summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp17
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h34
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp36
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp108
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp42
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp203
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp458
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp96
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp770
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1159
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h141
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td60
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp137
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td287
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td339
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp22
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h73
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp132
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h1
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h13
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp25
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp120
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp40
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp65
50 files changed, 3663 insertions, 1007 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 12ffbfd..11d2237 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -204,6 +204,17 @@ static const unsigned G8Regs[] = {
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
+static const unsigned G80Regs[] = {
+ PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+
static const unsigned QFRegs[] = {
PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
@@ -301,6 +312,12 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, G8Regs);
}
+static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, G80Regs);
+}
+
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 609d959..baf5902 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "PPCInstPrinter.h"
-#include "PPCInstrInfo.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCInstrInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -95,7 +95,8 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (MI->getOpcode() == PPC::RLDICR) {
+ if (MI->getOpcode() == PPC::RLDICR ||
+ MI->getOpcode() == PPC::RLDICR_32) {
unsigned char SH = MI->getOperand(2).getImm();
unsigned char ME = MI->getOperand(3).getImm();
// rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 5847b3a..bdad2fe 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,8 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -18,9 +20,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -113,8 +113,9 @@ public:
return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
}
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override {
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
@@ -130,12 +131,11 @@ public:
}
}
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override {
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
switch ((PPC::Fixups)Fixup.getKind()) {
- default: break;
+ default:
+ return false;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
// If the target symbol has a local entry point we must not attempt
@@ -148,10 +148,10 @@ public:
// and thus the shift to pack it.
unsigned Other = S->getOther() << 2;
if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
- IsResolved = false;
+ return true;
}
}
- break;
+ return false;
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index fd279c6..1488bd5 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "MCTargetDesc/PPCMCExpr.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index ae43e59d..dce4439 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -17,35 +17,31 @@
namespace llvm {
namespace PPC {
enum Fixups {
- // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
- // and 'bl'.
+ // 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
fixup_ppc_br24 = FirstTargetFixupKind,
-
- /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
- /// branches.
+
+ /// 14-bit PC relative relocation for conditional branches.
fixup_ppc_brcond14,
-
- /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches
- /// like 'ba' and 'bla'.
+
+ /// 24-bit absolute relocation for direct branches like 'ba' and 'bla'.
fixup_ppc_br24abs,
- /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional
- /// branches.
+ /// 14-bit absolute relocation for conditional branches.
fixup_ppc_brcond14abs,
- /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo)
- /// or ha16(_foo) for instrs like 'li' or 'addis'.
+ /// A 16-bit fixup corresponding to lo16(_foo) or ha16(_foo) for instrs like
+ /// 'li' or 'addis'.
fixup_ppc_half16,
-
- /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with
- /// implied 2 zero bits for instrs like 'std'.
+
+ /// A 14-bit fixup corresponding to lo16(_foo) with implied 2 zero bits for
+ /// instrs like 'std'.
fixup_ppc_half16ds,
- /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
- /// to __tls_get_addr for the TLS general and local dynamic models,
- /// or inserts the thread-pointer register number.
+ /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
+ /// TLS general and local dynamic models, or inserts the thread-pointer
+ /// register number.
fixup_ppc_nofixup,
-
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index d8fab5b..d30bf1a 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -20,7 +20,7 @@ void PPCMCAsmInfoDarwin::anchor() { }
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
if (is64Bit) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = false;
@@ -50,7 +50,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
NeedsLocalForSize = true;
if (is64Bit) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = T.getArch() == Triple::ppc64le;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 017d21a..92c8c22 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -11,22 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -34,10 +40,8 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
-class PPCMCCodeEmitter : public MCCodeEmitter {
- PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
- void operator=(const PPCMCCodeEmitter &) = delete;
+class PPCMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
@@ -46,8 +50,9 @@ public:
PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), CTX(ctx),
IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
-
- ~PPCMCCodeEmitter() override {}
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
+ ~PPCMCCodeEmitter() override = default;
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -103,6 +108,7 @@ public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override {
@@ -137,7 +143,7 @@ public:
}
break;
default:
- llvm_unreachable ("Invalid instruction size");
+ llvm_unreachable("Invalid instruction size");
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
@@ -238,7 +244,6 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return RegBits;
}
-
unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -266,7 +271,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
const MCOperand &MO = MI.getOperand(OpNo);
- assert(MO.isImm());
+ assert(MO.isImm() && !(MO.getImm() % 16) &&
+ "Expecting an immediate that is a multiple of 16");
return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
}
@@ -286,7 +292,6 @@ unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -302,7 +307,6 @@ unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -318,7 +322,6 @@ unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -383,7 +386,5 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
-
-
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 6b97d4c..54f6643 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCFixupKinds.h"
#include "PPCMCExpr.h"
+#include "PPCFixupKinds.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index bbd10e5..e8f220e 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -11,23 +11,30 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "PPCMCAsmInfo.h"
+#include "MCTargetDesc/PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -41,9 +48,10 @@ using namespace llvm;
#include "PPCGenRegisterInfo.inc"
// Pin the vtable to this file.
-PPCTargetStreamer::~PPCTargetStreamer() {}
PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+PPCTargetStreamer::~PPCTargetStreamer() = default;
+
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPPCMCInstrInfo(X);
@@ -96,12 +104,14 @@ static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
}
namespace {
+
class PPCTargetAsmStreamer : public PPCTargetStreamer {
formatted_raw_ostream &OS;
public:
PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
: PPCTargetStreamer(S), OS(OS) {}
+
void emitTCEntry(const MCSymbol &S) override {
OS << "\t.tc ";
OS << S.getName();
@@ -109,12 +119,15 @@ public:
OS << S.getName();
OS << '\n';
}
+
void emitMachine(StringRef CPU) override {
OS << "\t.machine " << CPU << '\n';
}
+
void emitAbiVersion(int AbiVersion) override {
OS << "\t.abiversion " << AbiVersion << '\n';
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
@@ -129,18 +142,22 @@ public:
class PPCTargetELFStreamer : public PPCTargetStreamer {
public:
PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
MCELFStreamer &getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
+
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
Streamer.EmitValueToAlignment(8);
Streamer.EmitSymbolValue(&S, 8);
}
+
void emitMachine(StringRef CPU) override {
// FIXME: Is there anything to do in here or does this directive only
// limit the parser?
}
+
void emitAbiVersion(int AbiVersion) override {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned Flags = MCA.getELFHeaderEFlags();
@@ -148,6 +165,7 @@ public:
Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
MCA.setELFHeaderEFlags(Flags);
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
@@ -170,6 +188,7 @@ public:
if ((Flags & ELF::EF_PPC64_ABI) == 0)
MCA.setELFHeaderEFlags(Flags | 2);
}
+
void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
auto *Symbol = cast<MCSymbolELF>(S);
// When encoding an assignment to set symbol A to symbol B, also copy
@@ -188,21 +207,26 @@ public:
class PPCTargetMachOStreamer : public PPCTargetStreamer {
public:
PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
void emitTCEntry(const MCSymbol &S) override {
llvm_unreachable("Unknown pseudo-op: .tc");
}
+
void emitMachine(StringRef CPU) override {
// FIXME: We should update the CPUType, CPUSubType in the Object file if
// the new values are different from the defaults.
}
+
void emitAbiVersion(int AbiVersion) override {
llvm_unreachable("Unknown pseudo-op: .abiversion");
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
llvm_unreachable("Unknown pseudo-op: .localentry");
}
};
-}
+
+} // end anonymous namespace
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 0989e0c..893233e 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -17,23 +17,22 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MathExtras.h"
+#include <cstdint>
namespace llvm {
+
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
class MCRegisterInfo;
-class MCSubtargetInfo;
class MCTargetOptions;
class Target;
class Triple;
class StringRef;
class raw_pwrite_stream;
-class raw_ostream;
Target &getThePPC32Target();
Target &getThePPC64Target();
@@ -83,7 +82,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
-} // End llvm namespace
+} // end namespace llvm
// Generated files will use "namespace PPC". To avoid symbol clash,
// undefine PPC here. PPC may be predefined on some hosts.
@@ -103,4 +102,4 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
-#endif
+#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 1f38a8c..d550627 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -18,7 +19,6 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -151,7 +151,7 @@ static void makeRelocationInfo(MachO::any_relocation_info &MRE,
// The bitfield offsets that work (as determined by trial-and-error)
// are different than what is documented in the mach-o manuals.
// This appears to be an endianness issue; reversing the order of the
- // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+ // documented bitfields in <llvm/BinaryFormat/MachO.h> fixes this (but
// breaks x86/ARM assembly).
MRE.r_word1 = ((Index << 8) | // was << 0
(IsPCRel << 7) | // was << 24
@@ -219,11 +219,11 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
const MCSymbol *SB = &B->getSymbol();
if (!SB->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
+ report_fatal_error("symbol '" + SB->getName() +
"' can not be undefined in a subtraction expression");
- // FIXME: is Type correct? see include/llvm/Support/MachO.h
- Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h
+ Value2 = Writer->getSymbolAddress(*SB, Layout);
FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
}
// FIXME: does FixedValue get used??
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index e01f49d..ad92ac8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
#define LLVM_LIB_TARGET_POWERPC_PPC_H
+#include "llvm/Support/CodeGen.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
// GCC #defines PPC on Linux but we use it as our namespace name
@@ -24,12 +25,11 @@ namespace llvm {
class PPCTargetMachine;
class PassRegistry;
class FunctionPass;
- class ImmutablePass;
class MachineInstr;
class AsmPrinter;
class MCInst;
- FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
+ FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
@@ -42,14 +42,17 @@ namespace llvm {
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCQPXLoadSplatPass();
- FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
+ FunctionPass *createPPCExpandISELPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
+ void initializePPCExpandISELPass(PassRegistry &);
+ void initializePPCTLSDynamicCallPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f0e0ebc..841b8c5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -16,11 +16,11 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
-#include "PPCInstrInfo.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
@@ -29,6 +29,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -55,11 +57,9 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -112,7 +112,9 @@ public:
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
- return AsmPrinter::runOnMachineFunction(MF);
+ bool Changed = AsmPrinter::runOnMachineFunction(MF);
+ emitXRayTable();
+ return Changed;
}
};
@@ -134,6 +136,7 @@ public:
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
+ void EmitInstruction(const MachineInstr *MI) override;
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@@ -402,7 +405,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
.addImm(CallTarget & 0xFFFF));
// Save the current TOC pointer before the remote call.
- int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+ int TOCSaveOffset = Subtarget->getFrameLowering()->getTOCSaveOffset();
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
@@ -1046,6 +1049,97 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (!Subtarget->isPPC64())
+ return PPCAsmPrinter::EmitInstruction(MI);
+
+ switch (MI->getOpcode()) {
+ default:
+ return PPCAsmPrinter::EmitInstruction(MI);
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
+ // .begin:
+ // b .end # lis 0, FuncId[16..32]
+ // nop # li 0, FuncId[0..15]
+ // std 0, -8(1)
+ // mflr 0
+ // bl __xray_FunctionEntry
+ // mtlr 0
+ // .end:
+ //
+ // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
+ // of instructions change.
+ MCSymbol *BeginOfSled = OutContext.createTempSymbol();
+ MCSymbol *EndOfSled = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(BeginOfSled);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::B).addExpr(
+ MCSymbolRefExpr::create(EndOfSled, OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL8_NOP)
+ .addExpr(MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol("__xray_FunctionEntry"),
+ OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
+ OutStreamer->EmitLabel(EndOfSled);
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER);
+ break;
+ }
+ case TargetOpcode::PATCHABLE_FUNCTION_EXIT: {
+ // .p2align 3
+ // .begin:
+ // b(lr)? # lis 0, FuncId[16..32]
+ // nop # li 0, FuncId[0..15]
+ // std 0, -8(1)
+ // mflr 0
+ // bl __xray_FunctionExit
+ // mtlr 0
+ // .end:
+ // b(lr)?
+ //
+ // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
+ // of instructions change.
+ const MachineInstr *Next = [&] {
+ MachineBasicBlock::const_iterator It(MI);
+ assert(It != MI->getParent()->end());
+ ++It;
+ assert(It->isReturn());
+ return &*It;
+ }();
+ OutStreamer->EmitCodeAlignment(8);
+ MCSymbol *BeginOfSled = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(BeginOfSled);
+ MCInst TmpInst;
+ LowerPPCMachineInstrToMCInst(Next, TmpInst, *this, false);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL8_NOP)
+ .addExpr(MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol("__xray_FunctionExit"),
+ OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT);
+ break;
+ }
+ case TargetOpcode::PATCHABLE_TAIL_CALL:
+ case TargetOpcode::PATCHABLE_RET:
+ // PPC's tail call instruction, e.g. PPC::TCRETURNdi8, doesn't really
+ // lower to a PPC::B instruction. The PPC::B instruction is generated
+ // before it, and handled by the normal case.
+ llvm_unreachable("Tail call is handled in the normal case. See comments"
+ "around this assert.");
+ }
+}
+
void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
PPCTargetStreamer *TS =
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 93c201d..55e105d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -7,15 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements converting i1 values to i32 if they could be more
+// This file implements converting i1 values to i32/i64 if they could be more
// profitably allocated as GPRs rather than CRs. This pass will become totally
// unnecessary if Register Bank Allocation and Global Instruction Selection ever
// go upstream.
//
-// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the
// transitive closure of their uses includes only PHINodes, CallInsts, and
// ReturnInsts. The rational is that arguments are generally passed and returned
-// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will
// actually save casts at the Machine Instruction level.
//
// It might be useful to expand this pass to add bit-wise operations to the list
@@ -33,11 +33,12 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
@@ -50,8 +51,9 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Pass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Casting.h"
#include <cassert>
using namespace llvm;
@@ -87,17 +89,19 @@ class PPCBoolRetToInt : public FunctionPass {
return Defs;
}
- // Translate a i1 value to an equivalent i32 value:
- static Value *translate(Value *V) {
- Type *Int32Ty = Type::getInt32Ty(V->getContext());
+ // Translate a i1 value to an equivalent i32/i64 value:
+ Value *translate(Value *V) {
+ Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
+ : Type::getInt32Ty(V->getContext());
+
if (auto *C = dyn_cast<Constant>(V))
- return ConstantExpr::getZExt(C, Int32Ty);
+ return ConstantExpr::getZExt(C, IntTy);
if (auto *P = dyn_cast<PHINode>(V)) {
// Temporarily set the operands to 0. We'll fix this later in
// runOnUse.
- Value *Zero = Constant::getNullValue(Int32Ty);
+ Value *Zero = Constant::getNullValue(IntTy);
PHINode *Q =
- PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+ PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P);
for (unsigned i = 0; i < P->getNumOperands(); ++i)
Q->addIncoming(Zero, P->getIncomingBlock(i));
return Q;
@@ -109,7 +113,7 @@ class PPCBoolRetToInt : public FunctionPass {
auto InstPt =
A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
- return new ZExtInst(V, Int32Ty, "", InstPt);
+ return new ZExtInst(V, IntTy, "", InstPt);
}
typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
@@ -185,6 +189,13 @@ class PPCBoolRetToInt : public FunctionPass {
if (skipFunction(F))
return false;
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<PPCTargetMachine>();
+ ST = TM.getSubtargetImpl(F);
+
PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
B2IMap Bool2IntMap;
bool Changed = false;
@@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass {
return Changed;
}
- static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+ bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
B2IMap &BoolToIntMap) {
auto Defs = findAllDefs(U);
@@ -262,13 +273,16 @@ class PPCBoolRetToInt : public FunctionPass {
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
+
+private:
+ const PPCSubtarget *ST;
};
} // end anonymous namespace
char PPCBoolRetToInt::ID = 0;
INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
- "Convert i1 constants to i32 if they are returned",
+ "Convert i1 constants to i32/i64 if they are returned",
false, false)
FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index ae76386..d0b66f9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -15,8 +15,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
@@ -78,7 +78,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes.resize(Fn.getNumBlockIDs());
auto GetAlignmentAdjustment =
- [TII](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
+ [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
unsigned Align = MBB.getAlignment();
if (!Align)
return 0;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2c62a0f..53f33ac 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -23,14 +23,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -43,6 +44,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -81,10 +83,7 @@ namespace {
public:
static char ID;
- PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
- initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
- }
- PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ PPCCTRLoops() : FunctionPass(ID) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
@@ -99,16 +98,18 @@ namespace {
}
private:
- bool mightUseCTR(const Triple &TT, BasicBlock *BB);
+ bool mightUseCTR(BasicBlock *BB);
bool convertToCTRLoop(Loop *L);
private:
- PPCTargetMachine *TM;
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *STI;
+ const PPCTargetLowering *TLI;
+ const DataLayout *DL;
+ const TargetLibraryInfo *LibInfo;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
DominatorTree *DT;
- const TargetLibraryInfo *LibInfo;
bool PreserveLCSSA;
};
@@ -149,9 +150,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
- return new PPCCTRLoops(TM);
-}
+FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
@@ -169,6 +168,14 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ TM = &TPC->getTM<PPCTargetMachine>();
+ STI = TM->getSubtargetImpl(F);
+ TLI = STI->getTargetLowering();
+
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -198,8 +205,7 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
// Determining the address of a TLS variable results in a function call in
// certain TLS models.
-static bool memAddrUsesCTR(const PPCTargetMachine *TM,
- const Value *MemAddr) {
+static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
if (!GV) {
// Recurse to check for constants that refer to TLS global variables.
@@ -213,35 +219,35 @@ static bool memAddrUsesCTR(const PPCTargetMachine *TM,
if (!GV->isThreadLocal())
return false;
- if (!TM)
- return true;
- TLSModel::Model Model = TM->getTLSModel(GV);
+ TLSModel::Model Model = TM.getTLSModel(GV);
return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
}
-bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+// Loop through the inline asm constraints and look for something that clobbers
+// ctr.
+static bool asmClobbersCTR(InlineAsm *IA) {
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
+ }
+ return false;
+}
+
+bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
- // Inline ASM is okay, unless it clobbers the ctr register.
- InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
- for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
- InlineAsm::ConstraintInfo &C = CIV[i];
- if (C.Type != InlineAsm::isInput)
- for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
- if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
- return true;
- }
-
+ if (asmClobbersCTR(IA))
+ return true;
continue;
}
- if (!TM)
- return true;
- const TargetLowering *TLI =
- TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
-
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
// sin, cos, exp and log are always calls.
@@ -298,15 +304,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- case Intrinsic::round: Opcode = ISD::FROUND; break;
- case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
- case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
+ case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
}
}
@@ -315,7 +323,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// (i.e. soft float or atomics). If adapting for targets that do,
// additional care is required here.
- LibFunc::Func Func;
+ LibFunc Func;
if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
LibInfo->getLibFunc(F->getName(), Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
@@ -329,58 +337,57 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
switch (Func) {
default: return true;
- case LibFunc::copysign:
- case LibFunc::copysignf:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
continue; // ISD::FCOPYSIGN is never a library call.
- case LibFunc::copysignl:
+ case LibFunc_copysignl:
return true;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
continue; // ISD::FABS is never a library call.
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
Opcode = ISD::FSQRT; break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
Opcode = ISD::FFLOOR; break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
Opcode = ISD::FNEARBYINT; break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
Opcode = ISD::FCEIL; break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
Opcode = ISD::FRINT; break;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
Opcode = ISD::FROUND; break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
Opcode = ISD::FTRUNC; break;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
Opcode = ISD::FMINNUM; break;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
Opcode = ISD::FMAXNUM; break;
}
}
if (Opcode) {
- auto &DL = CI->getModule()->getDataLayout();
- MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
- true);
+ MVT VTy = TLI->getSimpleValueType(
+ *DL, CI->getArgOperand(0)->getType(), true);
if (VTy == MVT::Other)
return true;
@@ -404,17 +411,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
CastInst *CI = cast<CastInst>(J);
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
- isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
+ isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
return true;
- } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ } else if (isLargeIntegerTy(!TM->isPPC64(),
J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::UDiv ||
J->getOpcode() == Instruction::SDiv ||
J->getOpcode() == Instruction::URem ||
J->getOpcode() == Instruction::SRem)) {
return true;
- } else if (TT.isArch32Bit() &&
+ } else if (!TM->isPPC64() &&
isLargeIntegerTy(false, J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::Shl ||
J->getOpcode() == Instruction::AShr ||
@@ -426,16 +433,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// On PowerPC, indirect jumps use the counter register.
return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
- if (!TM)
- return true;
- const TargetLowering *TLI =
- TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
-
if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
- if (TM->getSubtargetImpl(*BB->getParent())->getTargetLowering()->useSoftFloat()) {
+ if (STI->useSoftFloat()) {
switch(J->getOpcode()) {
case Instruction::FAdd:
case Instruction::FSub:
@@ -454,7 +456,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
}
for (Value *Operand : J->operands())
- if (memAddrUsesCTR(TM, Operand))
+ if (memAddrUsesCTR(*TM, Operand))
return true;
}
@@ -464,11 +466,6 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
bool MadeChange = false;
- const Triple TT =
- Triple(L->getHeader()->getParent()->getParent()->getTargetTriple());
- if (!TT.isArch32Bit() && !TT.isArch64Bit())
- return MadeChange; // Unknown arch. type.
-
// Process nested loops first.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
MadeChange |= convertToCTRLoop(*I);
@@ -493,7 +490,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// want to use the counter register if the loop contains calls.
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I)
- if (mightUseCTR(TT, *I))
+ if (mightUseCTR(*I))
return MadeChange;
SmallVector<BasicBlock*, 4> ExitingBlocks;
@@ -515,7 +512,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
} else if (!SE->isLoopInvariant(EC, L))
continue;
- if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
continue;
// We now have a loop-invariant count of loop iterations (which is not the
@@ -569,7 +566,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// preheader, then we can use it (except if the preheader contains a use of
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
- if (!Preheader || mightUseCTR(TT, Preheader))
+ if (!Preheader || mightUseCTR(Preheader))
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (!Preheader)
return MadeChange;
@@ -580,10 +577,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
+ SCEVExpander SCEVE(*SE, *DL, "loopcnt");
LLVMContext &C = SE->getContext();
- Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
- Type::getInt32Ty(C);
+ Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
@@ -611,7 +607,10 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// The old condition may be dead now, and may have even created a dead PHI
// (the original induction variable).
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- DeleteDeadPHIs(CountedExitBlock);
+ // Run through the basic blocks of the loop and see if any of them have dead
+ // PHIs that can be removed.
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
++NumCTRLoops;
return MadeChange;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index 6bd2296..811e4dd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
new file mode 100644
index 0000000..41e3190
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -0,0 +1,458 @@
+//===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that expands the ISEL instruction into an if-then-else sequence.
+// This pass must be run post-RA since all operands must be physical registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-expand-isel"
+
+STATISTIC(NumExpanded, "Number of ISEL instructions expanded");
+STATISTIC(NumRemoved, "Number of ISEL instructions removed");
+STATISTIC(NumFolded, "Number of ISEL instructions folded");
+
+// If -ppc-gen-isel=false is set, we will disable generating the ISEL
+// instruction on all PPC targets. Otherwise, if the user set option
+// -misel or the platform supports ISEL by default, still generate the
+// ISEL instruction, else expand it.
+static cl::opt<bool>
+ GenerateISEL("ppc-gen-isel",
+ cl::desc("Enable generating the ISEL instruction."),
+ cl::init(true), cl::Hidden);
+
+namespace {
+class PPCExpandISEL : public MachineFunctionPass {
+ DebugLoc dl;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ bool IsTrueBlockRequired;
+ bool IsFalseBlockRequired;
+ MachineBasicBlock *TrueBlock;
+ MachineBasicBlock *FalseBlock;
+ MachineBasicBlock *NewSuccessor;
+ MachineBasicBlock::iterator TrueBlockI;
+ MachineBasicBlock::iterator FalseBlockI;
+
+ typedef SmallVector<MachineInstr *, 4> BlockISELList;
+ typedef SmallDenseMap<int, BlockISELList> ISELInstructionList;
+
+ // A map of MBB numbers to their lists of contained ISEL instructions.
+ ISELInstructionList ISELInstructions;
+
+ /// Initialize the object.
+ void initialize(MachineFunction &MFParam);
+
+ void handleSpecialCases(BlockISELList &BIL, MachineBasicBlock *MBB);
+ void reorganizeBlockLayout(BlockISELList &BIL, MachineBasicBlock *MBB);
+ void populateBlocks(BlockISELList &BIL);
+ void expandMergeableISELs(BlockISELList &BIL);
+ void expandAndMergeISELs();
+
+ bool canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI);
+
+ /// Is this instruction an ISEL or ISEL8?
+ static bool isISEL(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::ISEL || MI.getOpcode() == PPC::ISEL8);
+ }
+
+ /// Is this instruction an ISEL8?
+ static bool isISEL8(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::ISEL8);
+ }
+
+ /// Are the two operands using the same register?
+ bool useSameRegister(const MachineOperand &Op1, const MachineOperand &Op2) {
+ return (Op1.getReg() == Op2.getReg());
+ }
+
+ ///
+ /// Collect all ISEL instructions from the current function.
+ ///
+ /// Walk the current function and collect all the ISEL instructions that are
+ /// found. The instructions are placed in the ISELInstructions vector.
+ ///
+ /// \return true if any ISEL instructions were found, false otherwise
+ ///
+ bool collectISELInstructions();
+
+public:
+ static char ID;
+ PPCExpandISEL() : MachineFunctionPass(ID) {
+ initializePPCExpandISELPass(*PassRegistry::getPassRegistry());
+ }
+
+ ///
+ /// Determine whether to generate the ISEL instruction or expand it.
+ ///
+ /// Expand ISEL instruction into if-then-else sequence when one of
+ /// the following two conditions hold:
+ /// (1) -ppc-gen-isel=false
+ /// (2) hasISEL() return false
+ /// Otherwise, still generate ISEL instruction.
+ /// The -ppc-gen-isel option is set to true by default. Which means the ISEL
+ /// instruction is still generated by default on targets that support them.
+ ///
+ /// \return true if ISEL should be expanded into if-then-else code sequence;
+ /// false if ISEL instruction should be generated, i.e. not expaned.
+ ///
+ static bool isExpandISELEnabled(const MachineFunction &MF);
+
+#ifndef NDEBUG
+ void DumpISELInstructions() const;
+#endif
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!isExpandISELEnabled(MF))
+ return false;
+
+ DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
+ initialize(MF);
+
+ if (!collectISELInstructions()) {
+ DEBUG(dbgs() << "No ISEL instructions in this function\n");
+ return false;
+ }
+
+#ifndef NDEBUG
+ DumpISELInstructions();
+#endif
+
+ expandAndMergeISELs();
+
+ return true;
+ }
+};
+} // end anonymous namespace
+
+void PPCExpandISEL::initialize(MachineFunction &MFParam) {
+ MF = &MFParam;
+ TII = MF->getSubtarget().getInstrInfo();
+ ISELInstructions.clear();
+}
+
+bool PPCExpandISEL::isExpandISELEnabled(const MachineFunction &MF) {
+ return !GenerateISEL || !MF.getSubtarget<PPCSubtarget>().hasISEL();
+}
+
+bool PPCExpandISEL::collectISELInstructions() {
+ for (MachineBasicBlock &MBB : *MF) {
+ BlockISELList thisBlockISELs;
+ for (MachineInstr &MI : MBB)
+ if (isISEL(MI))
+ thisBlockISELs.push_back(&MI);
+ if (!thisBlockISELs.empty())
+ ISELInstructions.insert(std::make_pair(MBB.getNumber(), thisBlockISELs));
+ }
+ return !ISELInstructions.empty();
+}
+
+#ifndef NDEBUG
+void PPCExpandISEL::DumpISELInstructions() const {
+ for (const auto &I : ISELInstructions) {
+ DEBUG(dbgs() << "BB#" << I.first << ":\n");
+ for (const auto &VI : I.second)
+ DEBUG(dbgs() << " "; VI->print(dbgs()));
+ }
+}
+#endif
+
+/// Contiguous ISELs that have the same condition can be merged.
+bool PPCExpandISEL::canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI) {
+ // Same Condition Register?
+ if (!useSameRegister(PrevPushedMI->getOperand(3), MI->getOperand(3)))
+ return false;
+
+ MachineBasicBlock::iterator PrevPushedMBBI = *PrevPushedMI;
+ MachineBasicBlock::iterator MBBI = *MI;
+ return (std::prev(MBBI) == PrevPushedMBBI); // Contiguous ISELs?
+}
+
+void PPCExpandISEL::expandAndMergeISELs() {
+ for (auto &BlockList : ISELInstructions) {
+ DEBUG(dbgs() << "Expanding ISEL instructions in BB#" << BlockList.first
+ << "\n");
+
+ BlockISELList &CurrentISELList = BlockList.second;
+ auto I = CurrentISELList.begin();
+ auto E = CurrentISELList.end();
+
+ while (I != E) {
+ BlockISELList SubISELList;
+
+ SubISELList.push_back(*I++);
+
+ // Collect the ISELs that can be merged together.
+ while (I != E && canMerge(SubISELList.back(), *I))
+ SubISELList.push_back(*I++);
+
+ expandMergeableISELs(SubISELList);
+ }
+ }
+}
+
+void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL,
+ MachineBasicBlock *MBB) {
+ IsTrueBlockRequired = false;
+ IsFalseBlockRequired = false;
+
+ auto MI = BIL.begin();
+ while (MI != BIL.end()) {
+ assert(isISEL(**MI) && "Expecting an ISEL instruction");
+ DEBUG(dbgs() << "ISEL: " << **MI << "\n");
+
+ MachineOperand &Dest = (*MI)->getOperand(0);
+ MachineOperand &TrueValue = (*MI)->getOperand(1);
+ MachineOperand &FalseValue = (*MI)->getOperand(2);
+
+ // If at least one of the ISEL instructions satisfy the following
+ // condition, we need the True Block:
+ // The Dest Register and True Value Register are not the same
+ // Similarly, if at least one of the ISEL instructions satisfy the
+ // following condition, we need the False Block:
+ // The Dest Register and False Value Register are not the same.
+
+ bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
+ bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
+
+ // Special case 1, all registers used by ISEL are the same one.
+ if (!IsADDIInstRequired && !IsORIInstRequired) {
+ DEBUG(dbgs() << "Remove redudant ISEL instruction.");
+ NumRemoved++;
+ (*MI)->eraseFromParent();
+ // Setting MI to the erase result keeps the iterator valid and increased.
+ MI = BIL.erase(MI);
+ continue;
+ }
+
+ // Special case 2, the two input registers used by ISEL are the same.
+ // Note 1: We favor merging ISEL expansions over folding a single one. If
+ // the passed list has multiple merge-able ISEL's, we won't fold any.
+ // Note 2: There is no need to test for PPC::R0/PPC::X0 because PPC::ZERO/
+ // PPC::ZERO8 will be used for the first operand if the value is meant to
+ // be zero. In this case, the useSameRegister method will return false,
+ // thereby preventing this ISEL from being folded.
+
+ if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) {
+ DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy.");
+ NumFolded++;
+ BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI))
+ .add(Dest)
+ .add(TrueValue)
+ .add(MachineOperand::CreateImm(0));
+ (*MI)->eraseFromParent();
+ // Setting MI to the erase result keeps the iterator valid and increased.
+ MI = BIL.erase(MI);
+ continue;
+ }
+
+ IsTrueBlockRequired |= IsADDIInstRequired;
+ IsFalseBlockRequired |= IsORIInstRequired;
+ MI++;
+ }
+}
+
+void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
+ MachineBasicBlock *MBB) {
+ if (BIL.empty())
+ return;
+
+ assert((IsTrueBlockRequired || IsFalseBlockRequired) &&
+ "Should have been handled by special cases earlier!");
+
+ MachineBasicBlock *Successor = nullptr;
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineBasicBlock::iterator MBBI = (*BIL.back());
+ NewSuccessor = (MBBI != MBB->getLastNonDebugInstr() || !MBB->canFallThrough())
+ // Another BB is needed to move the instructions that
+ // follow this ISEL. If the ISEL is the last instruction
+ // in a block that can't fall through, we also need a block
+ // to branch to.
+ ? MF->CreateMachineBasicBlock(LLVM_BB)
+ : nullptr;
+
+ MachineFunction::iterator It = MBB->getIterator();
+ ++It; // Point to the successor block of MBB.
+
+ // If NewSuccessor is NULL then the last ISEL in this group is the last
+ // non-debug instruction in this block. Find the fall-through successor
+ // of this block to use when updating the CFG below.
+ if (!NewSuccessor) {
+ for (auto &Succ : MBB->successors()) {
+ if (MBB->isLayoutSuccessor(Succ)) {
+ Successor = Succ;
+ break;
+ }
+ }
+ } else
+ Successor = NewSuccessor;
+
+ // The FalseBlock and TrueBlock are inserted after the MBB block but before
+ // its successor.
+ // Note this need to be done *after* the above setting the Successor code.
+ if (IsFalseBlockRequired) {
+ FalseBlock = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, FalseBlock);
+ }
+
+ if (IsTrueBlockRequired) {
+ TrueBlock = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, TrueBlock);
+ }
+
+ if (NewSuccessor) {
+ MF->insert(It, NewSuccessor);
+
+ // Transfer the rest of this block into the new successor block.
+ NewSuccessor->splice(NewSuccessor->end(), MBB,
+ std::next(MachineBasicBlock::iterator(BIL.back())),
+ MBB->end());
+ NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Copy the original liveIns of MBB to NewSuccessor.
+ for (auto &LI : MBB->liveins())
+ NewSuccessor->addLiveIn(LI);
+
+ // After splitting the NewSuccessor block, Regs defined but not killed
+ // in MBB should be treated as liveins of NewSuccessor.
+ // Note: Cannot use stepBackward instead since we are using the Reg
+ // liveness state at the end of MBB (liveOut of MBB) as the liveIn for
+ // NewSuccessor. Otherwise, will cause cyclic dependence.
+ LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
+ SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
+ for (MachineInstr &MI : *MBB)
+ LPR.stepForward(MI, Clobbers);
+ for (auto &LI : LPR)
+ NewSuccessor->addLiveIn(LI);
+ } else {
+ // Remove successor from MBB.
+ MBB->removeSuccessor(Successor);
+ }
+
+ // Note that this needs to be done *after* transfering the successors from MBB
+ // to the NewSuccessor block, otherwise these blocks will also be transferred
+ // as successors!
+ MBB->addSuccessor(IsTrueBlockRequired ? TrueBlock : Successor);
+ MBB->addSuccessor(IsFalseBlockRequired ? FalseBlock : Successor);
+
+ if (IsTrueBlockRequired) {
+ TrueBlockI = TrueBlock->begin();
+ TrueBlock->addSuccessor(Successor);
+ }
+
+ if (IsFalseBlockRequired) {
+ FalseBlockI = FalseBlock->begin();
+ FalseBlock->addSuccessor(Successor);
+ }
+
+ // Conditional branch to the TrueBlock or Successor
+ BuildMI(*MBB, BIL.back(), dl, TII->get(PPC::BC))
+ .add(BIL.back()->getOperand(3))
+ .addMBB(IsTrueBlockRequired ? TrueBlock : Successor);
+
+ // Jump over the true block to the new successor if the condition is false.
+ BuildMI(*(IsFalseBlockRequired ? FalseBlock : MBB),
+ (IsFalseBlockRequired ? FalseBlockI : BIL.back()), dl,
+ TII->get(PPC::B))
+ .addMBB(Successor);
+
+ if (IsFalseBlockRequired)
+ FalseBlockI = FalseBlock->begin(); // get the position of PPC::B
+}
+
+void PPCExpandISEL::populateBlocks(BlockISELList &BIL) {
+ for (auto &MI : BIL) {
+ assert(isISEL(*MI) && "Expecting an ISEL instruction");
+
+ MachineOperand &Dest = MI->getOperand(0); // location to store to
+ MachineOperand &TrueValue = MI->getOperand(1); // Value to store if
+ // condition is true
+ MachineOperand &FalseValue = MI->getOperand(2); // Value to store if
+ // condition is false
+ MachineOperand &ConditionRegister = MI->getOperand(3); // Condition
+
+ DEBUG(dbgs() << "Dest: " << Dest << "\n");
+ DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n");
+ DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n");
+ DEBUG(dbgs() << "ConditionRegister: " << ConditionRegister << "\n");
+
+
+ // If the Dest Register and True Value Register are not the same one, we
+ // need the True Block.
+ bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
+ bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
+
+ if (IsADDIInstRequired) {
+ // Copy the result into the destination if the condition is true.
+ BuildMI(*TrueBlock, TrueBlockI, dl,
+ TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI))
+ .add(Dest)
+ .add(TrueValue)
+ .add(MachineOperand::CreateImm(0));
+
+ // Add the LiveIn registers required by true block.
+ TrueBlock->addLiveIn(TrueValue.getReg());
+ }
+
+ if (IsORIInstRequired) {
+ // Add the LiveIn registers required by false block.
+ FalseBlock->addLiveIn(FalseValue.getReg());
+ }
+
+ if (NewSuccessor) {
+ // Add the LiveIn registers required by NewSuccessor block.
+ NewSuccessor->addLiveIn(Dest.getReg());
+ NewSuccessor->addLiveIn(TrueValue.getReg());
+ NewSuccessor->addLiveIn(FalseValue.getReg());
+ NewSuccessor->addLiveIn(ConditionRegister.getReg());
+ }
+
+ // Copy the value into the destination if the condition is false.
+ if (IsORIInstRequired)
+ BuildMI(*FalseBlock, FalseBlockI, dl,
+ TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI))
+ .add(Dest)
+ .add(FalseValue)
+ .add(MachineOperand::CreateImm(0));
+
+ MI->eraseFromParent(); // Remove the ISEL instruction.
+
+ NumExpanded++;
+ }
+}
+
+void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) {
+ // At this stage all the ISELs of BIL are in the same MBB.
+ MachineBasicBlock *MBB = BIL.back()->getParent();
+
+ handleSpecialCases(BIL, MBB);
+ reorganizeBlockLayout(BIL, MBB);
+ populateBlocks(BIL);
+}
+
+INITIALIZE_PASS(PPCExpandISEL, DEBUG_TYPE, "PowerPC Expand ISEL Generation",
+ false, false)
+char PPCExpandISEL::ID = 0;
+
+FunctionPass *llvm::createPPCExpandISELPass() { return new PPCExpandISEL(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 9b91b9a..bc99571 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -13,10 +13,10 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "PPCCallingConv.h"
+#include "PPC.h"
#include "PPCCCState.h"
+#include "PPCCallingConv.h"
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
@@ -1330,7 +1330,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
- .addImm(NumBytes);
+ .addImm(NumBytes).addImm(0);
// Prepare to assign register arguments. Every argument uses up a
// GPR protocol register even if it's passed in a floating-point
@@ -2246,6 +2246,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
}
case PPC::EXTSW:
+ case PPC::EXTSW_32:
case PPC::EXTSW_32_64: {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
return false;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index f9ea871..b49c334 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -433,25 +433,21 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
-
- // If we are a leaf function, and use up to 224 bytes of stack space,
- // don't have a frame pointer, calls, or dynamic alloca then we do not need
- // to adjust the stack pointer (we fit in the Red Zone).
- // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
- // stackless code if all local vars are reg-allocated.
- bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
unsigned LR = RegInfo->getRARegister();
- if (!DisableRedZone &&
- (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
- !Subtarget.isSVR4ABI() || // allocated locals.
- FrameSize == 0) &&
- FrameSize <= 224 && // Fits in red zone.
- !MFI.hasVarSizedObjects() && // No dynamic alloca.
- !MFI.adjustsStack() && // No calls.
- !MustSaveLR(MF, LR) &&
- !RegInfo->hasBasePointer(MF)) { // No special alignment.
+ bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+ bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
+ !MFI.adjustsStack() && // No calls.
+ !MustSaveLR(MF, LR) && // No need to save LR.
+ !RegInfo->hasBasePointer(MF); // No special alignment.
+
+ // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
+ // code if all local vars are reg-allocated.
+ bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
+
+ // Check whether we can skip adjusting the stack pointer (by using red zone)
+ if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
// No need for frame
if (UpdateMF)
MFI.setStackSize(0);
@@ -519,11 +515,10 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
- unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
+ unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI)
@@ -616,8 +611,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
return true;
// Get the list of callee-saved registers for the target.
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
// Get all the available registers in the block.
@@ -663,8 +657,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
// and the stack frame is large, we need two scratch registers.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned FrameSize = determineFrameLayout(MF, false);
@@ -694,10 +687,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo &MFI = MF.getFrameInfo();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -1221,10 +1212,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Get alignment info so we know how to restore the SP.
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1549,8 +1538,7 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
// Create branch instruction for pseudo tail call return instruction
unsigned RetOpcode = MBBI->getOpcode();
@@ -1588,8 +1576,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1791,8 +1778,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
HasGPSaveArea = true;
}
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (RegInfo->hasBasePointer(MF)) {
int FI = PFI->getBasePointerSaveIndex();
assert(FI && "No Base Pointer Save Slot!");
@@ -1880,8 +1866,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
if (HasVRSaveArea) {
- // Insert alignment padding, we need 16-byte alignment.
- LowerBound = (LowerBound - 15) & ~(15);
+ // Insert alignment padding, we need 16-byte alignment. Note: for postive
+ // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
+ // we are using negative number here (the stack grows downward). We should
+ // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
+ // is the alignment size ( n = 16 here) and y is the size after aligning.
+ assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
+ LowerBound &= ~(15);
for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
int FI = VRegs[i].getFrameIdx();
@@ -1913,12 +1904,13 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
+ const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
+ const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
+ unsigned Size = TRI.getSpillSize(RC);
+ unsigned Align = TRI.getSpillAlignment(RC);
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
// Might we have over-aligned allocas?
bool HasAlVars = MFI.hasVarSizedObjects() &&
@@ -1926,9 +1918,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// These kinds of spills might need two registers.
if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
}
}
@@ -1945,8 +1935,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction *MF = MBB.getParent();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -2087,8 +2076,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction *MF = MBB.getParent();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 1e51c1f..901539b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -12,34 +12,76 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <new>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "ppc-codegen"
+STATISTIC(NumSextSetcc,
+ "Number of (sext(setcc)) nodes expanded into GPR sequence.");
+STATISTIC(NumZextSetcc,
+ "Number of (zext(setcc)) nodes expanded into GPR sequence.");
+STATISTIC(SignExtensionsAdded,
+ "Number of sign extensions for compare inputs added.");
+STATISTIC(ZeroExtensionsAdded,
+ "Number of zero extensions for compare inputs added.");
+STATISTIC(NumLogicOpsOnComparison,
+ "Number of logical ops on i1 values calculated in GPR.");
+STATISTIC(OmittedForNonExtendUses,
+ "Number of compares not eliminated as they have non-extending uses.");
+
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
@@ -60,6 +102,7 @@ static cl::opt<bool> EnableBranchHint(
cl::Hidden);
namespace {
+
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
/// instructions for SelectionDAG operations.
@@ -69,9 +112,10 @@ namespace {
const PPCSubtarget *PPCSubTarget;
const PPCTargetLowering *PPCLowering;
unsigned GlobalBaseReg;
+
public:
- explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm) {}
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm) {}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
@@ -134,7 +178,7 @@ namespace {
/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
@@ -167,7 +211,11 @@ namespace {
/// a base register plus a signed 16-bit displacement that is a multiple of 4.
/// Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
+ }
+
+ bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
}
// Select an address into a single register.
@@ -184,7 +232,6 @@ namespace {
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
-
switch(ConstraintID) {
default:
errs() << "ConstraintID: " << ConstraintID << "\n";
@@ -223,7 +270,34 @@ namespace {
#include "PPCGenDAGISel.inc"
private:
+ // Conversion type for interpreting results of a 32-bit instruction as
+ // a 64-bit value or vice versa.
+ enum ExtOrTruncConversion { Ext, Trunc };
+
+ // Modifiers to guide how an ISD::SETCC node's result is to be computed
+ // in a GPR.
+ // ZExtOrig - use the original condition code, zero-extend value
+ // ZExtInvert - invert the condition code, zero-extend value
+ // SExtOrig - use the original condition code, sign-extend value
+ // SExtInvert - invert the condition code, sign-extend value
+ enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
+
bool trySETCC(SDNode *N);
+ bool tryEXTEND(SDNode *N);
+ bool tryLogicOpOfCompares(SDNode *N);
+ SDValue computeLogicOpInGPR(SDValue LogicOp);
+ SDValue signExtendInputIfNeeded(SDValue Input);
+ SDValue zeroExtendInputIfNeeded(SDValue Input);
+ SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
+ SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
+ SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
+ SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
+ SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
+ SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
void PeepholePPC64();
void PeepholePPC64ZExt();
@@ -235,9 +309,11 @@ private:
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
+ bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
};
-}
+
+} // end anonymous namespace
/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,
@@ -303,7 +379,6 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
}
}
-
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
@@ -349,26 +424,6 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
.getNode();
}
-/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
-/// or 64-bit immediate, and if the value can be accurately represented as a
-/// sign extension from a 16-bit value. If so, this returns true and the
-/// immediate.
-static bool isIntS16Immediate(SDNode *N, short &Imm) {
- if (N->getOpcode() != ISD::Constant)
- return false;
-
- Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
- if (N->getValueType(0) == MVT::i32)
- return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
- else
- return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
-}
-
-static bool isIntS16Immediate(SDValue Op, short &Imm) {
- return isIntS16Immediate(Op.getNode(), Imm);
-}
-
-
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
@@ -515,12 +570,12 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- APInt LKZ, LKO, RKZ, RKO;
- CurDAG->computeKnownBits(Op0, LKZ, LKO);
- CurDAG->computeKnownBits(Op1, RKZ, RKO);
+ KnownBits LKnown, RKnown;
+ CurDAG->computeKnownBits(Op0, LKnown);
+ CurDAG->computeKnownBits(Op1, RKnown);
- unsigned TargetMask = LKZ.getZExtValue();
- unsigned InsertMask = RKZ.getZExtValue();
+ unsigned TargetMask = LKnown.Zero.getZExtValue();
+ unsigned InsertMask = RKnown.Zero.getZExtValue();
if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
unsigned Op0Opc = Op0.getOpcode();
@@ -563,9 +618,9 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
// The AND mask might not be a constant, and we need to make sure that
// if we're going to fold the masking with the insert, all bits not
// know to be zero in the mask are known to be one.
- APInt MKZ, MKO;
- CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
- bool CanFoldMask = InsertMask == MKO.getZExtValue();
+ KnownBits MKnown;
+ CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
+ bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
unsigned SHOpc = Op1.getOperand(0).getOpcode();
if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
@@ -659,7 +714,10 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) {
static unsigned getInt64Count(int64_t Imm) {
unsigned Count = getInt64CountDirect(Imm);
- if (Count == 1)
+
+ // If the instruction count is 1 or 2, we do not need further analysis
+ // since rotate + load constant requires at least 2 instructions.
+ if (Count <= 2)
return Count;
for (unsigned r = 1; r < 63; ++r) {
@@ -769,7 +827,10 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl,
static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) {
unsigned Count = getInt64CountDirect(Imm);
- if (Count == 1)
+
+ // If the instruction count is 1 or 2, we do not need further analysis
+ // since rotate + load constant requires at least 2 instructions.
+ if (Count <= 2)
return getInt64Direct(CurDAG, dl, Imm);
unsigned RMin = 0;
@@ -833,6 +894,7 @@ static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) {
}
namespace {
+
class BitPermutationSelector {
struct ValueBit {
SDValue V;
@@ -898,14 +960,12 @@ class BitPermutationSelector {
// associated with each) used to choose the lowering method.
struct ValueRotInfo {
SDValue V;
- unsigned RLAmt;
- unsigned NumGroups;
- unsigned FirstGroupStartIdx;
- bool Repl32;
+ unsigned RLAmt = std::numeric_limits<unsigned>::max();
+ unsigned NumGroups = 0;
+ unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
+ bool Repl32 = false;
- ValueRotInfo()
- : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
- Repl32(false) {}
+ ValueRotInfo() = default;
// For sorting (in reverse order) by NumGroups, and then by
// FirstGroupStartIdx.
@@ -1985,7 +2045,8 @@ public:
return RNLM;
}
};
-} // anonymous namespace
+
+} // end anonymous namespace
bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
if (N->getValueType(0) != MVT::i32 &&
@@ -2057,7 +2118,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
getI32Imm(Imm & 0xFFFF, dl)), 0);
Opc = PPC::CMPLW;
} else {
- short SImm;
+ int16_t SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
getI32Imm((int)SImm & 0xFFFF,
@@ -2104,7 +2165,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
getI64Imm(Imm & 0xFFFF, dl)), 0);
Opc = PPC::CMPLD;
} else {
- short SImm;
+ int16_t SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
getI64Imm(SImm & 0xFFFF, dl)),
@@ -2443,6 +2504,525 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
return true;
}
+// Is this opcode a bitwise logical operation?
+static bool isLogicOp(unsigned Opc) {
+ return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
+}
+
+/// If this node is a sign/zero extension of an integer comparison,
+/// it can usually be computed in GPR's rather than using comparison
+/// instructions and ISEL. We only do this on 64-bit targets for now
+/// as the code is specialized for 64-bit (it uses 64-bit instructions
+/// and assumes 64-bit registers).
+bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) {
+ if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
+ return false;
+ assert((N->getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOpcode() == ISD::SIGN_EXTEND) &&
+ "Expecting a zero/sign extend node!");
+
+ SDValue WideRes;
+ // If we are zero-extending the result of a logical operation on i1
+ // values, we can keep the values in GPRs.
+ if (isLogicOp(N->getOperand(0).getOpcode()) &&
+ N->getOperand(0).getValueType() == MVT::i1 &&
+ N->getOpcode() == ISD::ZERO_EXTEND)
+ WideRes = computeLogicOpInGPR(N->getOperand(0));
+ else if (N->getOperand(0).getOpcode() != ISD::SETCC)
+ return false;
+ else
+ WideRes =
+ getSETCCInGPR(N->getOperand(0),
+ N->getOpcode() == ISD::SIGN_EXTEND ?
+ SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
+
+ if (!WideRes)
+ return false;
+
+ SDLoc dl(N);
+ bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32;
+ bool Output32Bit = N->getValueType(0) == MVT::i32;
+
+ NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
+ NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
+
+ SDValue ConvOp = WideRes;
+ if (Inputs32Bit != Output32Bit)
+ ConvOp = addExtOrTrunc(WideRes, Inputs32Bit ? ExtOrTruncConversion::Ext :
+ ExtOrTruncConversion::Trunc);
+ ReplaceNode(N, ConvOp.getNode());
+
+ return true;
+}
+
+// Lower a logical operation on i1 values into a GPR sequence if possible.
+// The result can be kept in a GPR if requested.
+// Three types of inputs can be handled:
+// - SETCC
+// - TRUNCATE
+// - Logical operation (AND/OR/XOR)
+// There is also a special case that is handled (namely a complement operation
+// achieved with xor %a, -1).
+SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) {
+ assert(isLogicOp(LogicOp.getOpcode()) &&
+ "Can only handle logic operations here.");
+ assert(LogicOp.getValueType() == MVT::i1 &&
+ "Can only handle logic operations on i1 values here.");
+ SDLoc dl(LogicOp);
+ SDValue LHS, RHS;
+
+ // Special case: xor %a, -1
+ bool IsBitwiseNegation = isBitwiseNot(LogicOp);
+
+ // Produces a GPR sequence for each operand of the binary logic operation.
+ // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
+ // the value in a GPR and for logic operations, it will recursively produce
+ // a GPR sequence for the operation.
+ auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
+ unsigned OperandOpcode = Operand.getOpcode();
+ if (OperandOpcode == ISD::SETCC)
+ return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
+ else if (OperandOpcode == ISD::TRUNCATE) {
+ SDValue InputOp = Operand.getOperand(0);
+ EVT InVT = InputOp.getValueType();
+ return
+ SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
+ PPC::RLDICL, dl, InVT, InputOp,
+ getI64Imm(0, dl), getI64Imm(63, dl)), 0);
+ } else if (isLogicOp(OperandOpcode))
+ return computeLogicOpInGPR(Operand);
+ return SDValue();
+ };
+ LHS = getLogicOperand(LogicOp.getOperand(0));
+ RHS = getLogicOperand(LogicOp.getOperand(1));
+
+ // If a GPR sequence can't be produced for the LHS we can't proceed.
+ // Not producing a GPR sequence for the RHS is only a problem if this isn't
+ // a bitwise negation operation.
+ if (!LHS || (!RHS && !IsBitwiseNegation))
+ return SDValue();
+
+ NumLogicOpsOnComparison++;
+
+ // We will use the inputs as 64-bit values.
+ if (LHS.getValueType() == MVT::i32)
+ LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
+ if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
+ RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
+
+ unsigned NewOpc;
+ switch (LogicOp.getOpcode()) {
+ default: llvm_unreachable("Unknown logic operation.");
+ case ISD::AND: NewOpc = PPC::AND8; break;
+ case ISD::OR: NewOpc = PPC::OR8; break;
+ case ISD::XOR: NewOpc = PPC::XOR8; break;
+ }
+
+ if (IsBitwiseNegation) {
+ RHS = getI64Imm(1, dl);
+ NewOpc = PPC::XORI8;
+ }
+
+ return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
+
+}
+
+/// Try performing logical operations on results of comparisons in GPRs.
+/// It is typically preferred from a performance perspective over performing
+/// the operations on individual bits in the CR. We only do this on 64-bit
+/// targets for now as the code is specialized for 64-bit (it uses 64-bit
+/// instructions and assumes 64-bit registers).
+bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) {
+ if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
+ return false;
+ if (N->getValueType(0) != MVT::i1)
+ return false;
+ assert(isLogicOp(N->getOpcode()) &&
+ "Expected a logic operation on setcc results.");
+ SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
+ if (!LoweredLogical)
+ return false;
+
+ SDLoc dl(N);
+ bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
+ unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
+ SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+ SDValue LHS = LoweredLogical.getOperand(0);
+ SDValue RHS = LoweredLogical.getOperand(1);
+ SDValue WideOp;
+ SDValue OpToConvToRecForm;
+
+ // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode
+ // that is input to the XORI.
+ if (IsBitwiseNegate &&
+ LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
+ OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
+ else if (IsBitwiseNegate)
+ // If the input to the XORI isn't an extension, that's what we're after.
+ OpToConvToRecForm = LoweredLogical.getOperand(0);
+ else
+ // If this is not an XORI, it is a reg-reg logical op and we can convert it
+ // to record-form.
+ OpToConvToRecForm = LoweredLogical;
+
+ // Get the record-form version of the node we're looking to use to get the
+ // CR result from.
+ uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
+ int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
+
+ // Convert the right node to record-form. This is either the logical we're
+ // looking at or it is the input node to the negation (if we're looking at
+ // a bitwise negation).
+ if (NewOpc != -1 && IsBitwiseNegate) {
+ // The input to the XORI has a record-form. Use it.
+ assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
+ "Expected a PPC::XORI8 only for bitwise negation.");
+ // Emit the record-form instruction.
+ std::vector<SDValue> Ops;
+ for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
+ Ops.push_back(OpToConvToRecForm.getOperand(i));
+
+ WideOp =
+ SDValue(CurDAG->getMachineNode(NewOpc, dl,
+ OpToConvToRecForm.getValueType(),
+ MVT::Glue, Ops), 0);
+ } else {
+ assert((NewOpc != -1 || !IsBitwiseNegate) &&
+ "No record form available for AND8/OR8/XOR8?");
+ WideOp =
+ SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
+ MVT::i64, MVT::Glue, LHS, RHS), 0);
+ }
+
+ // Select this node to a single bit from CR0 set by the record-form node
+ // just created. For bitwise negation, use the EQ bit which is the equivalent
+ // of negating the result (i.e. it is a bit set when the result of the
+ // operation is zero).
+ SDValue SRIdxVal =
+ CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
+ SDValue CRBit =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+ MVT::i1, CR0Reg, SRIdxVal,
+ WideOp.getValue(1)), 0);
+ ReplaceNode(N, CRBit.getNode());
+ return true;
+}
+
+/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
+/// Useful when emitting comparison code for 32-bit values without using
+/// the compare instruction (which only considers the lower 32-bits).
+SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) {
+ assert(Input.getValueType() == MVT::i32 &&
+ "Can only sign-extend 32-bit values here.");
+ unsigned Opc = Input.getOpcode();
+
+ // The value was sign extended and then truncated to 32-bits. No need to
+ // sign extend it again.
+ if (Opc == ISD::TRUNCATE &&
+ (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
+ Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
+ return Input;
+
+ LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
+ // The input is a sign-extending load. No reason to sign-extend.
+ if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
+ return Input;
+
+ ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
+ // We don't sign-extend constants and already sign-extended values.
+ if (InputConst || Opc == ISD::AssertSext || Opc == ISD::SIGN_EXTEND_INREG ||
+ Opc == ISD::SIGN_EXTEND)
+ return Input;
+
+ SDLoc dl(Input);
+ SignExtensionsAdded++;
+ return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32, dl, MVT::i32, Input), 0);
+}
+
+/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
+/// Useful when emitting comparison code for 32-bit values without using
+/// the compare instruction (which only considers the lower 32-bits).
+SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) {
+ assert(Input.getValueType() == MVT::i32 &&
+ "Can only zero-extend 32-bit values here.");
+ LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
+ unsigned Opc = Input.getOpcode();
+
+ // No need to zero-extend loaded values (unless they're loaded with
+ // a sign-extending load).
+ if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
+ return Input;
+
+ ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
+ bool InputZExtConst = InputConst && InputConst->getSExtValue() >= 0;
+ // An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have
+ // to conservatively actually clear the high bits. We also don't need to
+ // zero-extend constants or values that are already zero-extended.
+ if (InputZExtConst || Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND)
+ return Input;
+
+ SDLoc dl(Input);
+ ZeroExtensionsAdded++;
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Input,
+ getI64Imm(0, dl), getI64Imm(32, dl)),
+ 0);
+}
+
+// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
+// course not actual zero/sign extensions that will generate machine code,
+// they're just a way to reinterpret a 32 bit value in a register as a
+// 64 bit value and vice-versa.
+SDValue PPCDAGToDAGISel::addExtOrTrunc(SDValue NatWidthRes,
+ ExtOrTruncConversion Conv) {
+ SDLoc dl(NatWidthRes);
+
+ // For reinterpreting 32-bit values as 64 bit values, we generate
+ // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
+ if (Conv == ExtOrTruncConversion::Ext) {
+ SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
+ SDValue SubRegIdx =
+ CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
+ ImDef, NatWidthRes, SubRegIdx), 0);
+ }
+
+ assert(Conv == ExtOrTruncConversion::Trunc &&
+ "Unknown convertion between 32 and 64 bit values.");
+ // For reinterpreting 64-bit values as 32-bit values, we just need to
+ // EXTRACT_SUBREG (i.e. extract the low word).
+ SDValue SubRegIdx =
+ CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
+ NatWidthRes, SubRegIdx), 0);
+}
+
+/// Produces a zero-extended result of comparing two 32-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
+ // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+ SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl),
+ getI32Imm(31, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
+ ShiftOps), 0);
+ }
+ case ISD::SETNE: {
+ // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
+ // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+ SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl),
+ getI32Imm(31, dl) };
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+ getI32Imm(1, dl)), 0);
+ }
+ }
+}
+
+/// Produces a sign-extended result of comparing two 32-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // (sext (setcc %a, %b, seteq)) ->
+ // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
+ // (sext (setcc %a, 0, seteq)) ->
+ // (ashr (shl (ctlz %a), 58), 63)
+ SDValue CountInput = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Cntlzw =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
+ SDValue SHLOps[] = { Cntlzw, getI32Imm(58, dl), getI32Imm(0, dl) };
+ SDValue Sldi =
+ SDValue(CurDAG->getMachineNode(PPC::RLDICR_32, dl, MVT::i32, SHLOps), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi,
+ getI32Imm(63, dl)), 0);
+ }
+ case ISD::SETNE: {
+ // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
+ // flip the bit, finally take 2's complement.
+ // (sext (setcc %a, %b, setne)) ->
+ // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
+ // Same as above, but the first xor is not needed.
+ // (sext (setcc %a, 0, setne)) ->
+ // (neg (xor (lshr (ctlz %a), 5), 1))
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+ SDValue ShiftOps[] =
+ { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) };
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+ SDValue Xori =
+ SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+ getI32Imm(1, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
+ }
+ }
+}
+
+/// Produces a zero-extended result of comparing two 64-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
+ // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
+ getI64Imm(58, dl), getI64Imm(63, dl)),
+ 0);
+ }
+ }
+}
+
+/// Produces a sign-extended result of comparing two 64-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
+ // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
+ // {addcz.reg, addcz.CA} = (addcarry %a, -1)
+ // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
+ SDValue AddInput = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+ SDValue Addic =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
+ AddInput, getI32Imm(~0U, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
+ Addic, Addic.getValue(1)), 0);
+ }
+ }
+}
+
+/// Does this SDValue have any uses for which keeping the value in a GPR is
+/// appropriate. This is meant to be used on values that have type i1 since
+/// it is somewhat meaningless to ask if values of other types can be kept in
+/// GPR's.
+static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
+ assert(Compare.getOpcode() == ISD::SETCC &&
+ "An ISD::SETCC node required here.");
+
+ // For values that have a single use, the caller should obviously already have
+ // checked if that use is an extending use. We check the other uses here.
+ if (Compare.hasOneUse())
+ return true;
+ // We want the value in a GPR if it is being extended, used for a select, or
+ // used in logical operations.
+ for (auto CompareUse : Compare.getNode()->uses())
+ if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
+ CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
+ CompareUse->getOpcode() != ISD::SELECT &&
+ !isLogicOp(CompareUse->getOpcode())) {
+ OmittedForNonExtendUses++;
+ return false;
+ }
+ return true;
+}
+
+/// Returns an equivalent of a SETCC node but with the result the same width as
+/// the inputs. This can nalso be used for SELECT_CC if either the true or false
+/// values is a power of two while the other is zero.
+SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare,
+ SetccInGPROpts ConvOpts) {
+ assert((Compare.getOpcode() == ISD::SETCC ||
+ Compare.getOpcode() == ISD::SELECT_CC) &&
+ "An ISD::SETCC node required here.");
+
+ // Don't convert this comparison to a GPR sequence because there are uses
+ // of the i1 result (i.e. uses that require the result in the CR).
+ if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
+ return SDValue();
+
+ SDValue LHS = Compare.getOperand(0);
+ SDValue RHS = Compare.getOperand(1);
+
+ // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
+ int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
+ EVT InputVT = LHS.getValueType();
+ if (InputVT != MVT::i32 && InputVT != MVT::i64)
+ return SDValue();
+
+ if (ConvOpts == SetccInGPROpts::ZExtInvert ||
+ ConvOpts == SetccInGPROpts::SExtInvert)
+ CC = ISD::getSetCCInverse(CC, true);
+
+ bool Inputs32Bit = InputVT == MVT::i32;
+ if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) {
+ LHS = signExtendInputIfNeeded(LHS);
+ RHS = signExtendInputIfNeeded(RHS);
+ } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) {
+ LHS = zeroExtendInputIfNeeded(LHS);
+ RHS = zeroExtendInputIfNeeded(RHS);
+ }
+
+ SDLoc dl(Compare);
+ ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+ int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
+ bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
+ ConvOpts == SetccInGPROpts::SExtInvert;
+
+ if (IsSext && Inputs32Bit)
+ return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
+ else if (Inputs32Bit)
+ return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
+ else if (IsSext)
+ return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
+ return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
+}
+
+/// Does this node represent a load/store node whose address can be represented
+/// with a register plus an immediate that's a multiple of \p Val:
+bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
+ LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
+ StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
+ SDValue AddrOp;
+ if (LDN)
+ AddrOp = LDN->getOperand(1);
+ else if (STN)
+ AddrOp = STN->getOperand(2);
+
+ short Imm = 0;
+ if (AddrOp.getOpcode() == ISD::ADD)
+ return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
+
+ // If the address comes from the outside, the offset will be zero.
+ return AddrOp.getOpcode() == ISD::CopyFromReg;
+}
+
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
@@ -2450,7 +3030,6 @@ void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
}
-
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -2474,19 +3053,24 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
- case ISD::Constant: {
+ case ISD::Constant:
if (N->getValueType(0) == MVT::i64) {
ReplaceNode(N, getInt64(CurDAG, N));
return;
}
break;
- }
- case ISD::SETCC: {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ if (tryEXTEND(N))
+ return;
+ break;
+
+ case ISD::SETCC:
if (trySETCC(N))
return;
break;
- }
+
case PPCISD::GlobalBaseReg:
ReplaceNode(N, getGlobalBaseReg());
return;
@@ -2502,11 +3086,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- case PPCISD::READ_TIME_BASE: {
+ case PPCISD::READ_TIME_BASE:
ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
MVT::Other, N->getOperand(0)));
return;
- }
case PPCISD::SRA_ADDZE: {
SDValue N0 = N->getOperand(0);
@@ -2626,6 +3209,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::AND: {
+ if (tryLogicOpOfCompares(N))
+ return;
+
unsigned Imm, Imm2, SH, MB, ME;
uint64_t Imm64;
@@ -2690,6 +3276,19 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
return;
}
+ // If this is a negated 64-bit zero-extension mask,
+ // i.e. the immediate is a sequence of ones from most significant side
+ // and all zero for reminder, we should use rldicr.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(~Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 63 - countTrailingOnes(~Imm64);
+ SH = 0;
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return;
+ }
+
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
@@ -2732,15 +3331,18 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (tryBitfieldInsert(N))
return;
- short Imm;
+ if (tryLogicOpOfCompares(N))
+ return;
+
+ int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
- APInt LHSKnownZero, LHSKnownOne;
- CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
+ KnownBits LHSKnown;
+ CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
// If this is equivalent to an add, then we can fold it with the
// FrameIndex calculation.
- if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
+ if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
return;
}
@@ -2749,8 +3351,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ISD::XOR: {
+ if (tryLogicOpOfCompares(N))
+ return;
+ break;
+ }
case ISD::ADD: {
- short Imm;
+ int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
@@ -2911,8 +3518,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
return;
}
-
break;
+
case ISD::VECTOR_SHUFFLE:
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
@@ -2940,7 +3547,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
+ cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
return;
}
}
@@ -3088,7 +3699,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue(Tmp, 0), GA));
return;
}
- case PPCISD::PPC32_PICGOT: {
+ case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
@@ -3096,7 +3707,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
MVT::i32);
return;
- }
+
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
@@ -3139,7 +3750,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue TmpVal = SDValue(Tmp, 0);
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
return;
-
} else if (Elt > 0) {
// Elt is odd and positive, in the range [17,31].
//
@@ -3154,7 +3764,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0)));
return;
-
} else {
// Elt is odd and negative, in the range [-31,-17].
//
@@ -3199,7 +3808,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue RHS, LHS;
- bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ bool BytesFound[8] = {false, false, false, false, false, false, false, false};
uint64_t Mask = 0, Alt = 0;
auto IsByteSelectCC = [this](SDValue O, unsigned &b,
@@ -3436,11 +4045,13 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
O0.getNode(), O1.getNode());
};
+ // FIXME: When the semantics of the interaction between select and undef
+ // are clearly defined, it may turn out to be unnecessary to break here.
SDValue TrueRes = TryFold(ConstTrue);
- if (!TrueRes)
+ if (!TrueRes || TrueRes.isUndef())
break;
SDValue FalseRes = TryFold(ConstFalse);
- if (!FalseRes)
+ if (!FalseRes || FalseRes.isUndef())
break;
// For us to materialize these using one instruction, we must be able to
@@ -3499,7 +4110,6 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
-
// Skip peepholes at -O0.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
@@ -3515,10 +4125,6 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
// be folded with the isel so that we don't need to materialize a register
// containing zero.
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
- // If we're not using isel, then this does not matter.
- if (!PPCSubTarget->hasISEL())
- return false;
-
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
@@ -4520,10 +5126,10 @@ void PPCDAGToDAGISel::PeepholePPC64() {
}
}
-
/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
- return new PPCDAGToDAGISel(TM);
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PPCDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2b9195b..b3a3c73 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13,37 +13,87 @@
#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "PPCCallingConv.h"
+#include "PPC.h"
#include "PPCCCState.h"
+#include "PPCCallingConv.h"
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
+#include "PPCRegisterInfo.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
-#include "PPCTargetObjectFile.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
#include <list>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -86,7 +136,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
}
- // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ // Match BITREVERSE to customized fast code sequence in the td file.
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
@@ -125,7 +179,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
}
- // PowerPC does not support direct load / store of condition registers
+ // PowerPC does not support direct load/store of condition registers.
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
@@ -154,11 +208,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
- // PowerPC has no SREM/UREM instructions
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
+ // PowerPC has no SREM/UREM instructions unless we are on P9
+ // On P9 we may use a hardware instruction to compute the remainder.
+ // The instructions are not legalized directly because in the cases where the
+ // result of both the remainder and the division is required it is more
+ // efficient to compute the remainder from the result of the division rather
+ // than use the remainder instruction.
+ if (Subtarget.isISA3_0()) {
+ setOperationAction(ISD::SREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Custom);
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+ } else {
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ }
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
@@ -360,6 +426,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
// Comparisons that require checking two conditions.
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -484,7 +555,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
- setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -634,6 +704,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRA, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v2i64, Legal);
+ // 128 bit shifts can be accomplished via 3 instructions for SHL and
+ // SRL, but not for SRA because of the instructions available:
+ // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
+ // doing
+ setOperationAction(ISD::SHL, MVT::v1i128, Expand);
+ setOperationAction(ISD::SRL, MVT::v1i128, Expand);
+ setOperationAction(ISD::SRA, MVT::v1i128, Expand);
+
setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
}
else {
@@ -687,6 +765,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ // 128 bit shifts can be accomplished via 3 instructions for SHL and
+ // SRL, but not for SRA because of the instructions available:
+ // VS{RL} and VS{RL}O.
+ setOperationAction(ISD::SHL, MVT::v1i128, Legal);
+ setOperationAction(ISD::SRL, MVT::v1i128, Legal);
+ setOperationAction(ISD::SRA, MVT::v1i128, Expand);
}
}
@@ -728,7 +813,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FABS , MVT::v4f64, Legal);
setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
- setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
@@ -774,7 +858,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FABS , MVT::v4f32, Legal);
setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
- setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
@@ -873,6 +956,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@@ -971,6 +1057,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxStoresPerMemset = 128;
MaxStoresPerMemcpy = 128;
MaxStoresPerMemmove = 128;
+ MaxLoadsPerMemcmp = 128;
+ } else {
+ MaxLoadsPerMemcmp = 8;
+ MaxLoadsPerMemcmpOptSize = 4;
}
}
@@ -1042,6 +1132,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXINSERT: return "PPCISD::XXINSERT";
+ case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
+ case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
@@ -1080,6 +1172,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
case PPCISD::STXSIX: return "PPCISD::STXSIX";
case PPCISD::VEXTS: return "PPCISD::VEXTS";
+ case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
@@ -1523,21 +1616,47 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
-bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
- unsigned &InsertAtByte, bool &Swap, bool IsLE) {
-
- // Check that the mask is shuffling words
- for (unsigned i = 0; i < 4; ++i) {
- unsigned B0 = N->getMaskElt(i*4);
- unsigned B1 = N->getMaskElt(i*4+1);
- unsigned B2 = N->getMaskElt(i*4+2);
- unsigned B3 = N->getMaskElt(i*4+3);
- if (B0 % 4)
+/// Check that the mask is shuffling N byte elements. Within each N byte
+/// element of the mask, the indices could be either in increasing or
+/// decreasing order as long as they are consecutive.
+/// \param[in] N the shuffle vector SD Node to analyze
+/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
+/// Word/DoubleWord/QuadWord).
+/// \param[in] StepLen the delta indices number among the N byte element, if
+/// the mask is in increasing/decreasing order then it is 1/-1.
+/// \return true iff the mask is shuffling N byte elements.
+static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
+ int StepLen) {
+ assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
+ "Unexpected element width.");
+ assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
+
+ unsigned NumOfElem = 16 / Width;
+ unsigned MaskVal[16]; // Width is never greater than 16
+ for (unsigned i = 0; i < NumOfElem; ++i) {
+ MaskVal[0] = N->getMaskElt(i * Width);
+ if ((StepLen == 1) && (MaskVal[0] % Width)) {
return false;
- if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
+ } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
return false;
+ }
+
+ for (unsigned int j = 1; j < Width; ++j) {
+ MaskVal[j] = N->getMaskElt(i * Width + j);
+ if (MaskVal[j] != MaskVal[j-1] + StepLen) {
+ return false;
+ }
+ }
}
+ return true;
+}
+
+bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ unsigned &InsertAtByte, bool &Swap, bool IsLE) {
+ if (!isNByteElemShuffleMask(N, 4, 1))
+ return false;
+
// Now we look at mask elements 0,4,8,12
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
@@ -1608,6 +1727,158 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
return false;
}
+bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+ // Ensure each byte index of the word is consecutive.
+ if (!isNByteElemShuffleMask(N, 4, 1))
+ return false;
+
+ // Now we look at mask elements 0,4,8,12, which are the beginning of words.
+ unsigned M0 = N->getMaskElt(0) / 4;
+ unsigned M1 = N->getMaskElt(4) / 4;
+ unsigned M2 = N->getMaskElt(8) / 4;
+ unsigned M3 = N->getMaskElt(12) / 4;
+
+ // If both vector operands for the shuffle are the same vector, the mask will
+ // contain only elements from the first one and the second one will be undef.
+ if (N->getOperand(1).isUndef()) {
+ assert(M0 < 4 && "Indexing into an undef vector?");
+ if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
+ return false;
+
+ ShiftElts = IsLE ? (4 - M0) % 4 : M0;
+ Swap = false;
+ return true;
+ }
+
+ // Ensure each word index of the ShuffleVector Mask is consecutive.
+ if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
+ return false;
+
+ if (IsLE) {
+ if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the second vector
+ // (or if there is no shift to be done at all).
+ Swap = false;
+ ShiftElts = (8 - M0) % 8;
+ } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the first vector
+ // (or if we're shifting by 4 - thereby simply swapping the vectors).
+ Swap = true;
+ ShiftElts = (4 - M0) % 4;
+ }
+
+ return true;
+ } else { // BE
+ if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 4 elements of the first vector.
+ Swap = false;
+ ShiftElts = M0;
+ } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 4 elements of the right vector.
+ Swap = true;
+ ShiftElts = M0 - 4;
+ }
+
+ return true;
+ }
+}
+
+bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+
+ if (!isNByteElemShuffleMask(N, Width, -1))
+ return false;
+
+ for (int i = 0; i < 16; i += Width)
+ if (N->getMaskElt(i) != i + Width - 1)
+ return false;
+
+ return true;
+}
+
+bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 2);
+}
+
+bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 4);
+}
+
+bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 8);
+}
+
+bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
+ return isXXBRShuffleMaskHelper(N, 16);
+}
+
+/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
+/// if the inputs to the instruction should be swapped and set \p DM to the
+/// value for the immediate.
+/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
+/// AND element 0 of the result comes from the first input (LE) or second input
+/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
+/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
+/// mask.
+bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
+ bool &Swap, bool IsLE) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+
+ // Ensure each byte index of the double word is consecutive.
+ if (!isNByteElemShuffleMask(N, 8, 1))
+ return false;
+
+ unsigned M0 = N->getMaskElt(0) / 8;
+ unsigned M1 = N->getMaskElt(8) / 8;
+ assert(((M0 | M1) < 4) && "A mask element out of bounds?");
+
+ // If both vector operands for the shuffle are the same vector, the mask will
+ // contain only elements from the first one and the second one will be undef.
+ if (N->getOperand(1).isUndef()) {
+ if ((M0 | M1) < 2) {
+ DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
+ Swap = false;
+ return true;
+ } else
+ return false;
+ }
+
+ if (IsLE) {
+ if (M0 > 1 && M1 < 2) {
+ Swap = false;
+ } else if (M0 < 2 && M1 > 1) {
+ M0 = (M0 + 2) % 4;
+ M1 = (M1 + 2) % 4;
+ Swap = true;
+ } else
+ return false;
+
+ // Note: if control flow comes here that means Swap is already set above
+ DM = (((~M1) & 1) << 1) + ((~M0) & 1);
+ return true;
+ } else { // BE
+ if (M0 < 2 && M1 > 1) {
+ Swap = false;
+ } else if (M0 > 1 && M1 < 2) {
+ M0 = (M0 + 2) % 4;
+ M1 = (M1 + 2) % 4;
+ Swap = true;
+ } else
+ return false;
+
+ // Note: if control flow comes here that means Swap is already set above
+ DM = (M0 << 1) + (M1 & 1);
+ return true;
+ }
+}
+
+
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -1643,7 +1914,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// If the element isn't a constant, bail fully out.
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
-
if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
@@ -1763,17 +2033,17 @@ int PPC::isQVALIGNIShuffleMask(SDNode *N) {
/// or 64-bit immediate, and if the value can be accurately represented as a
/// sign extension from a 16-bit value. If so, this returns true and the
/// immediate.
-static bool isIntS16Immediate(SDNode *N, short &Imm) {
+bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
if (!isa<ConstantSDNode>(N))
return false;
- Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
if (N->getValueType(0) == MVT::i32)
return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
else
return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
}
-static bool isIntS16Immediate(SDValue Op, short &Imm) {
+bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
@@ -1783,7 +2053,7 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) {
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
- short imm = 0;
+ int16_t imm = 0;
if (N.getOpcode() == ISD::ADD) {
if (isIntS16Immediate(N.getOperand(1), imm))
return false; // r+i
@@ -1800,17 +2070,14 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
// disjoint.
- APInt LHSKnownZero, LHSKnownOne;
- APInt RHSKnownZero, RHSKnownOne;
- DAG.computeKnownBits(N.getOperand(0),
- LHSKnownZero, LHSKnownOne);
-
- if (LHSKnownZero.getBoolValue()) {
- DAG.computeKnownBits(N.getOperand(1),
- RHSKnownZero, RHSKnownOne);
+ KnownBits LHSKnown, RHSKnown;
+ DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+
+ if (LHSKnown.Zero.getBoolValue()) {
+ DAG.computeKnownBits(N.getOperand(1), RHSKnown);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
- if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
@@ -1863,12 +2130,12 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg. If Aligned is true, only accept displacements
-/// suitable for STD and friends, i.e. multiples of 4.
+/// represented as reg+reg. If \p Alignment is non-zero, only accept
+/// displacements that are multiples of that value.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
- bool Aligned) const {
+ unsigned Alignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
@@ -1876,9 +2143,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
return false;
if (N.getOpcode() == ISD::ADD) {
- short imm = 0;
+ int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Aligned || (imm & 3) == 0)) {
+ (!Alignment || (imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -1900,16 +2167,16 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
return true; // [&g+r]
}
} else if (N.getOpcode() == ISD::OR) {
- short imm = 0;
+ int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Aligned || (imm & 3) == 0)) {
+ (!Alignment || (imm % Alignment) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
- APInt LHSKnownZero, LHSKnownOne;
- DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+ KnownBits LHSKnown;
+ DAG.computeKnownBits(N.getOperand(0), LHSKnown);
- if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (FrameIndexSDNode *FI =
@@ -1928,8 +2195,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
- short Imm;
- if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
+ int16_t Imm;
+ if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -1939,7 +2206,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
- (!Aligned || (CN->getZExtValue() & 3) == 0)) {
+ (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -1973,10 +2240,15 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
if (SelectAddressRegReg(N, Base, Index, DAG))
return true;
- // If the operand is an addition, always emit this as [r+r], since this is
- // better (for code size, and execution, as the memop does the add for free)
- // than emitting an explicit add.
- if (N.getOpcode() == ISD::ADD) {
+ // If the address is the result of an add, we will utilize the fact that the
+ // address calculation includes an implicit add. However, we can reduce
+ // register pressure if we do not materialize a constant just for use as the
+ // index register. We only get rid of the add if it is not an add of a
+ // value and a 16-bit signed constant and both have a single use.
+ int16_t imm = 0;
+ if (N.getOpcode() == ISD::ADD &&
+ (!isIntS16Immediate(N.getOperand(1), imm) ||
+ !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
@@ -2026,7 +2298,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
}
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
-
// Common code will reject creating a pre-inc form if the base pointer
// is a frame index, or if N is a store and the base pointer is either
// the same as or a predecessor of the value being stored. Check for
@@ -2050,14 +2321,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
return false;
}
@@ -2277,7 +2548,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
-
// FIXME: TLS addresses currently use medium model code sequences,
// which is the most useful form. Eventually support for small and
// large models could be added if users need it, at the cost of
@@ -2300,8 +2570,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL_LO);
- SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
+ SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
+ : DAG.getRegister(PPC::R2, MVT::i32);
+
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
}
@@ -2602,10 +2873,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- std::move(Args));
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -2737,7 +3007,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-bool
+bool
llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -2752,7 +3022,7 @@ llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
unsigned RegNum = State.getFirstUnallocated(ArgRegs);
int RegsLeft = NumArgRegs - RegNum;
- // Skip if there is not enough registers left for long double type (4 gpr regs
+ // Skip if there is not enough registers left for long double type (4 gpr regs
// in soft float mode) and put long double argument on the stack.
if (RegNum != NumArgRegs && RegsLeft < 4) {
for (int i = 0; i < RegsLeft; i++) {
@@ -4066,7 +4336,7 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
static bool
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
- if (CS->arg_size() != CallerFn->getArgumentList().size())
+ if (CS->arg_size() != CallerFn->arg_size())
return false;
ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
@@ -4222,11 +4492,12 @@ namespace {
struct TailCallArgumentInfo {
SDValue Arg;
SDValue FrameIdxOp;
- int FrameIdx;
+ int FrameIdx = 0;
- TailCallArgumentInfo() : FrameIdx(0) {}
+ TailCallArgumentInfo() = default;
};
-}
+
+} // end anonymous namespace
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static void StoreTailCallArgumentsToStackSlot(
@@ -4406,7 +4677,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
-
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
@@ -4602,7 +4872,6 @@ SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -4649,7 +4918,6 @@ SDValue PPCTargetLowering::FinishCall(
SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
-
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
@@ -4909,8 +5177,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be moved somewhere else
@@ -4960,9 +5227,8 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
Flags, DAG, dl);
// This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1),
- SDLoc(MemcpyCall));
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
Chain = CallSeqStart = NewCallSeqStart;
@@ -5043,9 +5309,9 @@ SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
// The MEMCPY must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1),
- SDLoc(MemcpyCall));
+ int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
return NewCallSeqStart;
@@ -5059,7 +5325,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
@@ -5105,10 +5370,30 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
};
const unsigned NumGPRs = array_lengthof(GPR);
- const unsigned NumFPRs = 13;
+ const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
const unsigned NumVRs = array_lengthof(VR);
const unsigned NumQFPRs = NumFPRs;
+ // On ELFv2, we can avoid allocating the parameter area if all the arguments
+ // can be passed to the callee in registers.
+ // For the fast calling convention, there is another check below.
+ // Note: We should keep consistent with LowerFormalArguments_64SVR4()
+ bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
+ if (!HasParameterArea) {
+ unsigned ParamAreaSize = NumGPRs * PtrByteSize;
+ unsigned AvailableFPRs = NumFPRs;
+ unsigned AvailableVRs = NumVRs;
+ unsigned NumBytesTmp = NumBytes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Outs[i].Flags.isNest()) continue;
+ if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytesTmp, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
+ HasParameterArea = true;
+ }
+ }
+
// When using the fast calling convention, we don't provide backing for
// arguments that will be in registers.
unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
@@ -5176,13 +5461,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
unsigned NumBytesActuallyUsed = NumBytes;
- // The prolog code of the callee may store up to 8 GPR argument registers to
+ // In the old ELFv1 ABI,
+ // the prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
- NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ // In the ELFv2 ABI, we allocate the parameter area iff a callee
+ // really requires memory operands, e.g. a vararg function.
+ if (HasParameterArea)
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ else
+ NumBytes = LinkageSize;
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -5204,8 +5494,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(NumBytes, dl, true), dl);
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -5401,6 +5690,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5486,6 +5777,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5520,6 +5813,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// GPRs when within range. For now, we always put the value in both
// locations (or even all three).
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5552,6 +5847,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5572,6 +5869,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5604,6 +5903,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5618,7 +5919,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
}
}
- assert(NumBytesActuallyUsed == ArgOffset);
+ assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
+ "mismatch in size of parameter area");
(void)NumBytesActuallyUsed;
if (!MemOpChains.empty())
@@ -5673,7 +5975,6 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
unsigned NumOps = Outs.size();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -5752,8 +6053,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -6065,7 +6365,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -6133,7 +6432,7 @@ PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- // Get the corect type for integers.
+ // Get the correct type for integers.
EVT IntVT = Op.getValueType();
// Get the inputs.
@@ -6150,7 +6449,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
// When we pop the dynamic allocation we need to restore the SP link.
SDLoc dl(Op);
- // Get the corect type for pointers.
+ // Get the correct type for pointers.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Construct the stack pointer operand.
@@ -6225,7 +6524,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Size = Op.getOperand(1);
SDLoc dl(Op);
- // Get the corect type for pointers.
+ // Get the correct type for pointers.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Negate the size.
SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
@@ -6356,6 +6655,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
default: break; // SETUO etc aren't handled by fsel.
case ISD::SETNE:
std::swap(TV, FV);
+ LLVM_FALLTHROUGH;
case ISD::SETEQ:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
@@ -6367,6 +6667,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETULT:
case ISD::SETLT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ LLVM_FALLTHROUGH;
case ISD::SETOGE:
case ISD::SETGE:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
@@ -6375,6 +6676,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETUGT:
case ISD::SETGT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ LLVM_FALLTHROUGH;
case ISD::SETOLE:
case ISD::SETLE:
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
@@ -6388,8 +6690,9 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
default: break; // SETUO etc aren't handled by fsel.
case ISD::SETNE:
std::swap(TV, FV);
+ LLVM_FALLTHROUGH;
case ISD::SETEQ:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6399,25 +6702,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6585,6 +6888,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
// Given the head of the old chain, ResChain, insert a token factor containing
// it and NewResChain, and make users of ResChain now be users of that token
// factor.
+// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
SDValue NewResChain,
SelectionDAG &DAG) const {
@@ -7585,6 +7889,53 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+
+ if (Subtarget.hasVSX() &&
+ PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
+ if (Swap)
+ std::swap(V1, V2);
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ SDValue Conv2 =
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
+
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
+ }
+
+ if (Subtarget.hasVSX() &&
+ PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
+ if (Swap)
+ std::swap(V1, V2);
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
+ SDValue Conv2 =
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
+
+ SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
+ }
+
+ if (Subtarget.hasP9Vector()) {
+ if (PPC::isXXBRHShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
+ } else if (PPC::isXXBRWShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
+ } else if (PPC::isXXBRDShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
+ SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
+ } else if (PPC::isXXBRQShuffleMask(SVOp)) {
+ SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
+ SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
+ }
+ }
+
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
@@ -7612,7 +7963,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
}
-
}
if (Subtarget.hasQPX()) {
@@ -7792,24 +8142,39 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
- cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
isDot = false;
switch (IntrinsicID) {
- default: return false;
- // Comparison predicates.
- case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ default:
+ return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ CompareOpc = 966;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ CompareOpc = 198;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ CompareOpc = 6;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ CompareOpc = 70;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ CompareOpc = 134;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpequd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 199;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb_p:
case Intrinsic::ppc_altivec_vcmpneh_p:
@@ -7818,45 +8183,80 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezh_p:
case Intrinsic::ppc_altivec_vcmpnezw_p:
if (Subtarget.hasP9Altivec()) {
- switch(IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb_p:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh_p:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew_p:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb_p:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh_p:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw_p:
+ CompareOpc = 391;
+ break;
}
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ CompareOpc = 454;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ CompareOpc = 710;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ CompareOpc = 774;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ CompareOpc = 838;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ CompareOpc = 902;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 967;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ CompareOpc = 518;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ CompareOpc = 582;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ CompareOpc = 646;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 711;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- // VSX predicate comparisons use the same infrastructure
+
+ // VSX predicate comparisons use the same infrastructure
case Intrinsic::ppc_vsx_xvcmpeqdp_p:
case Intrinsic::ppc_vsx_xvcmpgedp_p:
case Intrinsic::ppc_vsx_xvcmpgtdp_p:
@@ -7865,33 +8265,51 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_vsx_xvcmpgtsp_p:
if (Subtarget.hasVSX()) {
switch (IntrinsicID) {
- case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
- case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
- case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
- case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
- case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
- case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+ CompareOpc = 99;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgedp_p:
+ CompareOpc = 115;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+ CompareOpc = 107;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+ CompareOpc = 67;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgesp_p:
+ CompareOpc = 83;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+ CompareOpc = 75;
+ break;
}
- isDot = 1;
- }
- else
+ isDot = true;
+ } else
return false;
-
break;
- // Normal Comparisons.
- case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp:
+ CompareOpc = 966;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp:
+ CompareOpc = 198;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb:
+ CompareOpc = 6;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh:
+ CompareOpc = 70;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw:
+ CompareOpc = 134;
+ break;
case Intrinsic::ppc_altivec_vcmpequd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 199;
- isDot = 0;
- } else
+ else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb:
case Intrinsic::ppc_altivec_vcmpneh:
@@ -7899,43 +8317,67 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezb:
case Intrinsic::ppc_altivec_vcmpnezh:
case Intrinsic::ppc_altivec_vcmpnezw:
- if (Subtarget.hasP9Altivec()) {
+ if (Subtarget.hasP9Altivec())
switch (IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw:
+ CompareOpc = 391;
+ break;
}
- isDot = 0;
- } else
+ else
return false;
break;
- case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp:
+ CompareOpc = 454;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp:
+ CompareOpc = 710;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb:
+ CompareOpc = 774;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh:
+ CompareOpc = 838;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw:
+ CompareOpc = 902;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 967;
- isDot = 0;
- } else
+ else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub:
+ CompareOpc = 518;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh:
+ CompareOpc = 582;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw:
+ CompareOpc = 646;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 711;
- isDot = 0;
- } else
+ else
return false;
-
break;
}
return true;
@@ -7950,9 +8392,9 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (IntrinsicID == Intrinsic::thread_pointer) {
// Reads the thread pointer register, used for __builtin_thread_pointer.
- bool is64bit = Subtarget.isPPC64();
- return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
+ if (Subtarget.isPPC64())
+ return DAG.getRegister(PPC::X13, MVT::i64);
+ return DAG.getRegister(PPC::R2, MVT::i32);
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
@@ -8019,6 +8461,40 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return Flags;
}
+SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
+ // the beginning of the argument list.
+ int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
+ SDLoc DL(Op);
+ switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
+ case Intrinsic::ppc_cfence: {
+ assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
+ assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
+ return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
+ Op.getOperand(ArgStart + 1)),
+ Op.getOperand(0)),
+ 0);
+ }
+ default:
+ break;
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
+ // Check for a DIV with the same operands as this REM.
+ for (auto UI : Op.getOperand(1)->uses()) {
+ if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
+ (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
+ if (UI->getOperand(0) == Op.getOperand(0) &&
+ UI->getOperand(1) == Op.getOperand(1))
+ return SDValue();
+ }
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8044,7 +8520,7 @@ SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
}
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -8484,6 +8960,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
+ case ISD::SREM:
+ case ISD::UREM:
+ return LowerREM(Op, DAG);
}
}
@@ -8575,9 +9057,9 @@ static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
-Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord, bool IsStore,
- bool IsLoad) const {
+Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
if (Ord == AtomicOrdering::SequentiallyConsistent)
return callIntrinsic(Builder, Intrinsic::ppc_sync);
if (isReleaseOrStronger(Ord))
@@ -8585,15 +9067,22 @@ Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
return nullptr;
}
-Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord, bool IsStore,
- bool IsLoad) const {
- if (IsLoad && isAcquireOrStronger(Ord))
+Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
+ return Builder.CreateCall(
+ Intrinsic::getDeclaration(
+ Builder.GetInsertBlock()->getParent()->getParent(),
+ Intrinsic::ppc_cfence, {Inst->getType()}),
+ {Inst});
+ // FIXME: Can use isync for rmw operation.
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
- // FIXME: this is too conservative, a dependent branch + isync is enough.
- // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
- // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
- // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ }
return nullptr;
}
@@ -8889,6 +9378,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -8902,7 +9392,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
unsigned DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
@@ -8985,7 +9475,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
- const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -9174,10 +9663,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
- if (Subtarget.hasISEL() &&
- (MI.getOpcode() == PPC::SELECT_CC_I4 ||
+ if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
+ MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -9417,7 +9905,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
BB = EmitAtomicBinary(MI, BB, 8, 0);
-
else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
(Subtarget.hasPartwordAtomics() &&
@@ -10028,14 +10515,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
-
/// This function is called when we have proved that a SETCC node can be replaced
/// by subtraction (and other supporting instructions) so that the result of
/// comparison is kept in a GPR instead of CR. This function is purely for
/// codegen purposes and has some flags to guide the codegen process.
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
bool Swap, SDLoc &DL, SelectionDAG &DAG) {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
// Zero extend the operands to the largest legal integer. Originally, they
@@ -10068,7 +10553,6 @@ static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
DAGCombinerInfo &DCI) const {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
SelectionDAG &DAG = DCI.DAG;
@@ -10155,17 +10639,16 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
} else {
// This is neither a signed nor an unsigned comparison, just make sure
// that the high bits are equal.
- APInt Op1Zero, Op1One;
- APInt Op2Zero, Op2One;
- DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
- DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
+ KnownBits Op1Known, Op2Known;
+ DAG.computeKnownBits(N->getOperand(0), Op1Known);
+ DAG.computeKnownBits(N->getOperand(1), Op2Known);
// We don't really care about what is known about the first bit (if
// anything), so clear it in all masks prior to comparing them.
- Op1Zero.clearBit(0); Op1One.clearBit(0);
- Op2Zero.clearBit(0); Op2One.clearBit(0);
+ Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
+ Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
- if (Op1Zero != Op2Zero || Op1One != Op2One)
+ if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
return SDValue();
}
}
@@ -10842,6 +11325,132 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// This function adds the required vector_shuffle needed to get
+// the elements of the vector extract in the correct position
+// as specified by the CorrectElems encoding.
+static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
+ SDValue Input, uint64_t Elems,
+ uint64_t CorrectElems) {
+ SDLoc dl(N);
+
+ unsigned NumElems = Input.getValueType().getVectorNumElements();
+ SmallVector<int, 16> ShuffleMask(NumElems, -1);
+
+ // Knowing the element indices being extracted from the original
+ // vector and the order in which they're being inserted, just put
+ // them at element indices required for the instruction.
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (DAG.getDataLayout().isLittleEndian())
+ ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
+ else
+ ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
+ CorrectElems = CorrectElems >> 8;
+ Elems = Elems >> 8;
+ }
+
+ SDValue Shuffle =
+ DAG.getVectorShuffle(Input.getValueType(), dl, Input,
+ DAG.getUNDEF(Input.getValueType()), ShuffleMask);
+
+ EVT Ty = N->getValueType(0);
+ SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
+ return BV;
+}
+
+// Look for build vector patterns where input operands come from sign
+// extended vector_extract elements of specific indices. If the correct indices
+// aren't used, add a vector shuffle to fix up the indices and create a new
+// PPCISD:SExtVElems node which selects the vector sign extend instructions
+// during instruction selection.
+static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
+ // This array encodes the indices that the vector sign extend instructions
+ // extract from when extending from one type to another for both BE and LE.
+ // The right nibble of each byte corresponds to the LE incides.
+ // and the left nibble of each byte corresponds to the BE incides.
+ // For example: 0x3074B8FC byte->word
+ // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
+ // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
+ // For example: 0x000070F8 byte->double word
+ // For LE: the allowed indices are: 0x0,0x8
+ // For BE: the allowed indices are: 0x7,0xF
+ uint64_t TargetElems[] = {
+ 0x3074B8FC, // b->w
+ 0x000070F8, // b->d
+ 0x10325476, // h->w
+ 0x00003074, // h->d
+ 0x00001032, // w->d
+ };
+
+ uint64_t Elems = 0;
+ int Index;
+ SDValue Input;
+
+ auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
+ if (!Op)
+ return false;
+ if (Op.getOpcode() != ISD::SIGN_EXTEND)
+ return false;
+
+ SDValue Extract = Op.getOperand(0);
+ if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+ if (!ExtOp)
+ return false;
+
+ Index = ExtOp->getZExtValue();
+ if (Input && Input != Extract.getOperand(0))
+ return false;
+
+ if (!Input)
+ Input = Extract.getOperand(0);
+
+ Elems = Elems << 8;
+ Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
+ Elems |= Index;
+
+ return true;
+ };
+
+ // If the build vector operands aren't sign extended vector extracts,
+ // of the same input vector, then return.
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (!isSExtOfVecExtract(N->getOperand(i))) {
+ return SDValue();
+ }
+ }
+
+ // If the vector extract indicies are not correct, add the appropriate
+ // vector_shuffle.
+ int TgtElemArrayIdx;
+ int InputSize = Input.getValueType().getScalarSizeInBits();
+ int OutputSize = N->getValueType(0).getScalarSizeInBits();
+ if (InputSize + OutputSize == 40)
+ TgtElemArrayIdx = 0;
+ else if (InputSize + OutputSize == 72)
+ TgtElemArrayIdx = 1;
+ else if (InputSize + OutputSize == 48)
+ TgtElemArrayIdx = 2;
+ else if (InputSize + OutputSize == 80)
+ TgtElemArrayIdx = 3;
+ else if (InputSize + OutputSize == 96)
+ TgtElemArrayIdx = 4;
+ else
+ return SDValue();
+
+ uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
+ CorrectElems = DAG.getDataLayout().isLittleEndian()
+ ? CorrectElems & 0x0F0F0F0F0F0F0F0F
+ : CorrectElems & 0xF0F0F0F0F0F0F0F0;
+ if (Elems != CorrectElems) {
+ return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
+ }
+
+ // Regular lowering will catch cases where a shuffle is not needed.
+ return SDValue();
+}
+
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -10869,6 +11478,15 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
if (Reduced)
return Reduced;
+ // If we're building a vector out of extended elements from another vector
+ // we have P9 vector integer extend instructions.
+ if (Subtarget.hasP9Altivec()) {
+ Reduced = combineBVOfVecSExt(N, DAG);
+ if (Reduced)
+ return Reduced;
+ }
+
+
if (N->getValueType(0) != MVT::v2f64)
return SDValue();
@@ -11053,6 +11671,14 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
}
MVT VecTy = N->getValueType(0).getSimpleVT();
+
+ // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
+ // aligned and the type is a vector with elements up to 4 bytes
+ if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
+ && VecTy.getScalarSizeInBits() <= 32 ) {
+ return SDValue();
+ }
+
SDValue LoadOps[] = { Chain, Base };
SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
DAG.getVTList(MVT::v2f64, MVT::Other),
@@ -11117,6 +11743,13 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
SDValue Src = N->getOperand(SrcOpnd);
MVT VecTy = Src.getValueType().getSimpleVT();
+ // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
+ // aligned and the type is a vector with elements up to 4 bytes
+ if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
+ && VecTy.getScalarSizeInBits() <= 32 ) {
+ return SDValue();
+ }
+
// All stores are done as v2f64 and possible bit cast.
if (VecTy != MVT::v2f64) {
Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
@@ -11141,6 +11774,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
+ case ISD::SHL:
+ return combineSHL(N, DCI);
+ case ISD::SRA:
+ return combineSRA(N, DCI);
+ case ISD::SRL:
+ return combineSRL(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@@ -11227,9 +11866,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+ // If the type of BSWAP operand is wider than stored memory width
+ // it need to be shifted to the right side before STBRX.
+ EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
+ if (Op1VT.bitsGT(mVT)) {
+ int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
+ BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
+ DAG.getConstant(Shift, dl, MVT::i32));
+ // Need to truncate if this is a bswap of i64 stored as i32/i16.
+ if (Op1VT == MVT::i64)
+ BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
+ }
+
SDValue Ops[] = {
- N->getOperand(0), BSwapOp, N->getOperand(2),
- DAG.getValueType(N->getOperand(1).getValueType())
+ N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
@@ -11570,7 +12220,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
- case ISD::INTRINSIC_W_CHAIN: {
+ case ISD::INTRINSIC_W_CHAIN:
// For little endian, VSX loads require generating lxvd2x/xxswapd.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11583,8 +12233,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
- case ISD::INTRINSIC_VOID: {
+ case ISD::INTRINSIC_VOID:
// For little endian, VSX stores require generating xxswapd/stxvd2x.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11597,7 +12246,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -11635,9 +12283,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
-
break;
- case PPCISD::VCMP: {
+ case PPCISD::VCMP:
// If a VCMPo node already exists with exactly the same operands as this
// node, use its result instead of this node (VCMPo computes both a CR6 and
// a normal output).
@@ -11687,7 +12334,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue(VCMPoNode, 0);
}
break;
- }
case ISD::BRCOND: {
SDValue Cond = N->getOperand(1);
SDValue Target = N->getOperand(2);
@@ -11845,17 +12491,17 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
//===----------------------------------------------------------------------===//
void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+ Known.resetAll();
switch (Op.getOpcode()) {
default: break;
case PPCISD::LBRX: {
// lhbrx is known to have the top bits cleared out.
if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
- KnownZero = 0xFFFF0000;
+ Known.Zero = 0xFFFF0000;
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
@@ -11877,7 +12523,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
case Intrinsic::ppc_altivec_vcmpgtud_p:
- KnownZero = ~1U; // All bits but the low one are known to be zero.
+ Known.Zero = ~1U; // All bits but the low one are known to be zero.
break;
}
}
@@ -12295,7 +12941,6 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
-
switch (Intrinsic) {
case Intrinsic::ppc_qpx_qvlfd:
case Intrinsic::ppc_qpx_qvlfs:
@@ -12753,7 +13398,6 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
}
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-
if (!VT.isSimple() || !Subtarget.hasVSX())
return false;
@@ -12768,3 +13412,58 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isPosZero();
}
}
+
+// For vector shift operation op, fold
+// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
+static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ unsigned Opcode = N->getOpcode();
+ unsigned TargetOpcode;
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected shift operation");
+ case ISD::SHL:
+ TargetOpcode = PPCISD::SHL;
+ break;
+ case ISD::SRL:
+ TargetOpcode = PPCISD::SRL;
+ break;
+ case ISD::SRA:
+ TargetOpcode = PPCISD::SRA;
+ break;
+ }
+
+ if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
+ N1->getOpcode() == ISD::AND)
+ if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
+ if (Mask->getZExtValue() == OpSizeInBits - 1)
+ return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
+ return Value;
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
+ return Value;
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
+ return Value;
+
+ return SDValue();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 05acd25..49d7d82 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -17,13 +17,26 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
-#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
#include "llvm/Target/TargetLowering.h"
+#include <utility>
namespace llvm {
+
namespace PPCISD {
+
enum NodeType : unsigned {
// Start the numbering where the builtin ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -54,6 +67,10 @@ namespace llvm {
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,
+ /// SExtVElems, takes an input vector of a smaller type and sign
+ /// extends to an output vector of a larger type.
+ SExtVElems,
+
/// Reciprocal estimate instructions (unary FP ops).
FRE, FRSQRTE,
@@ -73,10 +90,18 @@ namespace llvm {
///
XXINSERT,
+ /// XXREVERSE - The PPC VSX reverse instruction
+ ///
+ XXREVERSE,
+
/// VECSHL - The PPC VSX shift left instruction
///
VECSHL,
+ /// XXPERMDI - The PPC XXPERMDI instruction
+ ///
+ XXPERMDI,
+
/// The CMPB instruction (takes two operands of i32 or i64).
CMPB,
@@ -104,9 +129,13 @@ namespace llvm {
/// at function entry, used for PIC code.
GlobalBaseReg,
- /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
- /// shift amounts. These nodes are generated by the multi-precision shift
- /// code.
+ /// These nodes represent PPC shifts.
+ ///
+ /// For scalar types, only the last `n + 1` bits of the shift amounts
+ /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
+ /// for exact behaviors.
+ ///
+ /// For vector types, only the last n bits are used. See vsld.
SRL, SRA, SHL,
/// The combination of sra[wd]i and addze used to implemented signed
@@ -398,10 +427,12 @@ namespace llvm {
/// the last operand.
TOC_ENTRY
};
- }
+
+ } // end namespace PPCISD
/// Define some predicates that are used for node matching.
namespace PPC {
+
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
@@ -431,7 +462,32 @@ namespace llvm {
/// a VMRGEW or VMRGOW instruction
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG);
-
+ /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXSLDWI instruction.
+ bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE);
+
+ /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRH instruction.
+ bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
+
+ /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRW instruction.
+ bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
+
+ /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRD instruction.
+ bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
+
+ /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXBRQ instruction.
+ bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
+
+ /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXPERMDI instruction.
+ bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE);
+
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
@@ -465,7 +521,8 @@ namespace llvm {
/// If this is a qvaligni shuffle mask, return the shift
/// amount, otherwise return -1.
int isQVALIGNIShuffleMask(SDNode *N);
- }
+
+ } // end namespace PPC
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &Subtarget;
@@ -492,6 +549,7 @@ namespace llvm {
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+
bool useSoftFloat() const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
@@ -514,6 +572,10 @@ namespace llvm {
return true;
}
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
bool supportSplitCSR(MachineFunction *MF) const override {
return
MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
@@ -554,7 +616,7 @@ namespace llvm {
/// is not better represented as reg+reg. If Aligned is true, only accept
/// displacements suitable for STD and friends, i.e. multiples of 4.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
- SelectionDAG &DAG, bool Aligned) const;
+ SelectionDAG &DAG, unsigned Alignment) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
@@ -585,8 +647,8 @@ namespace llvm {
SelectionDAG &DAG) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
@@ -596,10 +658,10 @@ namespace llvm {
return true;
}
- Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
- bool IsStore, bool IsLoad) const override;
- Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
- bool IsStore, bool IsLoad) const override;
+ Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
@@ -694,6 +756,10 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool convertSelectOfConstantsToMath() const override {
+ return true;
+ }
+
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -785,15 +851,13 @@ namespace llvm {
SDValue Chain;
SDValue ResChain;
MachinePointerInfo MPI;
- bool IsDereferenceable;
- bool IsInvariant;
- unsigned Alignment;
+ bool IsDereferenceable = false;
+ bool IsInvariant = false;
+ unsigned Alignment = 0;
AAMDNodes AAInfo;
- const MDNode *Ranges;
+ const MDNode *Ranges = nullptr;
- ReuseLoadInfo()
- : IsDereferenceable(false), IsInvariant(false), Alignment(0),
- Ranges(nullptr) {}
+ ReuseLoadInfo() = default;
MachineMemOperand::Flags MMOFlags() const {
MachineMemOperand::Flags F = MachineMemOperand::MONone;
@@ -878,6 +942,8 @@ namespace llvm {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
@@ -906,15 +972,13 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue
- LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
- bool
- CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -978,6 +1042,9 @@ namespace llvm {
SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
@@ -994,14 +1061,16 @@ namespace llvm {
CCAssignFn *useFastISelCCs(unsigned Flag) const;
SDValue
- combineElementTruncationToVectorTruncation(SDNode *N,
- DAGCombinerInfo &DCI) const;
+ combineElementTruncationToVectorTruncation(SDNode *N,
+ DAGCombinerInfo &DCI) const;
};
namespace PPC {
+
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo);
- }
+
+ } // end namespace PPC
bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -1026,6 +1095,10 @@ namespace llvm {
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-}
-#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+ bool isIntS16Immediate(SDNode *N, int16_t &Imm);
+ bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index fbec878..e2af5e5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -253,11 +253,11 @@ def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
Requires<[IsISA3_0]>;
}
-let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
+let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
"stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
-let mayStore = 1, hasSideEffects = 0 in
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
"stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64,
Requires<[IsISA3_0]>;
@@ -634,10 +634,19 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
"extsw", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+let isCodeGenOnly = 1 in
+def EXTSW_32 : XForm_11<31, 986, (outs gprc:$rA), (ins gprc:$rS),
+ "extsw $rA, $rS", IIC_IntSimple,
+ []>, isPPC64;
defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def SRADI_32 : XSForm_1<31, 413, (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IIC_IntRotateDI, []>, isPPC64;
+
defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
"cntlzd", "$rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctlz i64:$rS))]>;
@@ -674,6 +683,16 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divde $rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
isPPC64, Requires<[HasExtDiv]>;
+
+let Predicates = [IsISA3_0] in {
+def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "modsd $rT, $rA, $rB", IIC_IntDivW,
+ [(set i64:$rT, (srem i64:$rA, i64:$rB))]>;
+def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "modud $rT, $rA, $rB", IIC_IntDivW,
+ [(set i64:$rT, (urem i64:$rA, i64:$rB))]>;
+}
+
let Defs = [CR0] in
def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divde. $rT, $rA, $rB", IIC_IntDivD,
@@ -721,15 +740,26 @@ defm RLDICL : MDForm_1r<30, 0,
// For fast-isel:
let isCodeGenOnly = 1 in
def RLDICL_32_64 : MDForm_1<30, 0,
- (outs g8rc:$rA),
+ (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+// End fast-isel.
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm RLDICL_32 : MDForm_1r<30, 0,
+ (outs gprc:$rA),
(ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
-// End fast-isel.
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
+let isCodeGenOnly = 1 in
+def RLDICR_32 : MDForm_1<30, 1,
+ (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicr $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
defm RLDIC : MDForm_1r<30, 2,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
@@ -942,13 +972,15 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
// Support for medium and large code model.
let hasSideEffects = 0 in {
+let isReMaterializable = 1 in {
def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDIStocHA", []>, isPPC64;
+def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDItocL", []>, isPPC64;
+}
let mayLoad = 1 in
def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
-def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
- "#ADDItocL", []>, isPPC64;
}
// Support for thread-local storage.
@@ -963,6 +995,10 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
+
+let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
@@ -977,7 +1013,9 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
isPPC64;
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
-let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be
+// correct because the branch select pass is relying on it.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
@@ -1082,7 +1120,7 @@ def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
}
// Stores with Update (pre-inc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
"stbu $rS, $dst", IIC_LdStStoreUpd, []>,
@@ -1232,6 +1270,10 @@ def : Pat<(srl i64:$rS, i32:$rB),
def : Pat<(shl i64:$rS, i32:$rB),
(SLD $rS, $rB)>;
+// SUBFIC
+def : Pat<(sub imm64SExt16:$imm, i64:$in),
+ (SUBFIC8 $in, imm:$imm)>;
+
// SHL/SRL
def : Pat<(shl i64:$in, (i32 imm:$imm)),
(RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 5c02274..5465b5f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -407,7 +407,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
-let PPC970_Unit = 2 in { // Loads.
+let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -434,7 +434,7 @@ def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
[(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
-let PPC970_Unit = 2 in { // Stores.
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
"stvebx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
@@ -851,6 +851,10 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
// Additional Altivec Patterns
//
+// Extended mnemonics
+def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -983,6 +987,16 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSLH $vA, $vB))>;
def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
+def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRB $vA, $vB))>;
@@ -990,6 +1004,16 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRH $vA, $vB))>;
def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
+def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRAB $vA, $vB))>;
@@ -997,6 +1021,12 @@ def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRAH $vA, $vB))>;
def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRAW $vA, $vB))>;
+def : Pat<(v16i8 (PPCsra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (PPCsra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (PPCsra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
// Float to integer and integer to float conversions
def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
@@ -1068,14 +1098,24 @@ def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
// Vector shifts
def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsld $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+ "vsld $vD, $vA, $vB", IIC_VecGeneral, []>;
def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsrd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+ "vsrd $vD, $vA, $vB", IIC_VecGeneral, []>;
def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsrad $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+ "vsrad $vD, $vA, $vB", IIC_VecGeneral, []>;
+
+def : Pat<(v2i64 (shl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSLD $vA, $vB))>;
+def : Pat<(v2i64 (PPCshl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSLD $vA, $vB))>;
+def : Pat<(v2i64 (srl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRD $vA, $vB))>;
+def : Pat<(v2i64 (PPCsrl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRD $vA, $vB))>;
+def : Pat<(v2i64 (sra v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRAD $vA, $vB))>;
+def : Pat<(v2i64 (PPCsra v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRAD $vA, $vB))>;
// Vector Integer Arithmetic Instructions
let isCommutable = 1 in {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2e0b935..e74ba38 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -65,7 +65,9 @@ UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
- : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
+ /* CatchRetOpcode */ -1,
+ STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
@@ -290,6 +292,29 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return 0;
}
+// For opcodes with the ReMaterializable flag set, this function is called to
+// verify the instruction is really rematable.
+bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AliasAnalysis *AA) const {
+ switch (MI.getOpcode()) {
+ default:
+ // This function should only be called for opcodes with the ReMaterializable
+ // flag set.
+ llvm_unreachable("Unknown rematerializable operation!");
+ break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::LIS:
+ case PPC::LIS8:
+ case PPC::QVGPCI:
+ case PPC::ADDIStocHA:
+ case PPC::ADDItocL:
+ case PPC::LOAD_STACK_GUARD:
+ return true;
+ }
+ return false;
+}
+
unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
// Note: This list must be kept consistent with StoreRegToStackSlot.
@@ -438,8 +463,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opcode));
}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void PPCInstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(PPC::NOP);
}
@@ -662,12 +687,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
- BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
- BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ .addImm(Cond[0].getImm())
+ .add(Cond[1])
+ .addMBB(TBB);
return 1;
}
@@ -677,12 +704,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
- BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
- BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
else
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ .addImm(Cond[0].getImm())
+ .add(Cond[1])
+ .addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
@@ -692,9 +721,6 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
- if (!Subtarget.hasISEL())
- return false;
-
if (Cond.size() != 2)
return false;
@@ -736,9 +762,6 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
- assert(Subtarget.hasISEL() &&
- "Cannot insert select on target without ISEL support");
-
// Get the register classes.
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
@@ -1493,7 +1516,7 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
return Found;
}
-bool PPCInstrInfo::isPredicable(MachineInstr &MI) const {
+bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const {
unsigned OpC = MI.getOpcode();
switch (OpC) {
default:
@@ -1533,6 +1556,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case PPC::FCMPUD:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = MI.getOperand(2).getReg();
+ Value = 0;
+ Mask = 0;
return true;
}
}
@@ -1591,9 +1616,12 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// We can perform this optimization, equality only, if MI is
// zero-extending.
+ // FIXME: Other possible target instructions include ANDISo and
+ // RLWINM aliases, such as ROTRWI, EXTLWI, SLWI and SRWI.
if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
MIOpC == PPC::SLW || MIOpC == PPC::SLWo ||
MIOpC == PPC::SRW || MIOpC == PPC::SRWo ||
+ MIOpC == PPC::ANDIo ||
isZeroExtendingRotate) {
noSub = true;
equalityOnly = true;
@@ -1607,8 +1635,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (equalityOnly) {
// We need to check the uses of the condition register in order to reject
// non-equality comparisons.
- for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg),
- IE = MRI->use_instr_end(); I != IE; ++I) {
+ for (MachineRegisterInfo::use_instr_iterator
+ I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
+ I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
unsigned Pred = UseMI->getOperand(0).getImm();
@@ -1630,8 +1659,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
++I) {
bool FoundUse = false;
- for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg),
- JE = MRI->use_instr_end(); J != JE; ++J)
+ for (MachineRegisterInfo::use_instr_iterator
+ J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
+ J != JE; ++J)
if (&*J == &*I) {
FoundUse = true;
break;
@@ -1641,6 +1671,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
break;
}
+ SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+ SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
// There are two possible candidates which can be changed to set CR[01].
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
@@ -1652,9 +1685,37 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// same BB as the comparison. This is to allow the check below to avoid calls
// (and other explicit clobbers); instead we should really check for these
// more explicitly (in at least a few predecessors).
- else if (MI->getParent() != CmpInstr.getParent() || Value != 0) {
- // PPC does not have a record-form SUBri.
+ else if (MI->getParent() != CmpInstr.getParent())
return false;
+ else if (Value != 0) {
+ // The record-form instructions set CR bit based on signed comparison against 0.
+ // We try to convert a compare against 1 or -1 into a compare against 0.
+ bool Success = false;
+ if (!equalityOnly && MRI->hasOneUse(CRReg)) {
+ MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
+ if (UseMI->getOpcode() == PPC::BCC) {
+ PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
+ int16_t Immed = (int16_t)Value;
+
+ if (Immed == -1 && Pred == PPC::PRED_GT) {
+ // We convert "greater than -1" into "greater than or equal to 0",
+ // since we are assuming signed comparison by !equalityOnly
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
+ PPC::PRED_GE));
+ Success = true;
+ }
+ else if (Immed == 1 && Pred == PPC::PRED_LT) {
+ // We convert "less than 1" into "less than or equal to 0".
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
+ PPC::PRED_LE));
+ Success = true;
+ }
+ }
+ }
+
+ // PPC does not have a record-form SUBri.
+ if (!Success)
+ return false;
}
// Search for Sub.
@@ -1720,15 +1781,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (NewOpC == -1)
return false;
- SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
- SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
-
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
// needs to be updated to be based on SUB. Push the condition code
// operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
// condition code of these operands will be modified.
+ // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
+ // comparison against 0, which may modify predicate.
bool ShouldSwap = false;
- if (Sub) {
+ if (Sub && Value == 0) {
ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
Sub->getOperand(2).getReg() == SrcReg;
@@ -1765,6 +1825,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
} else // We need to abort on a user we don't understand.
return false;
}
+ assert(!(Value != 0 && ShouldSwap) &&
+ "Non-zero immediate support and ShouldSwap"
+ "may conflict in updating predicate");
// Create a new virtual register to hold the value of the CR set by the
// record-form instruction. If the instruction was not previously in
@@ -1836,8 +1899,7 @@ unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
PatchPointOpers Opers(&MI);
return Opers.getNumPatchBytes();
} else {
- const MCInstrDesc &Desc = get(Opcode);
- return Desc.getSize();
+ return get(Opcode).getSize();
}
}
@@ -1874,6 +1936,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
}
bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ auto &MBB = *MI.getParent();
+ auto DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
case TargetOpcode::LOAD_STACK_GUARD: {
assert(Subtarget.isTargetLinux() &&
@@ -1892,6 +1956,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case PPC::DFSTOREf64: {
assert(Subtarget.hasP9Vector() &&
"Invalid D-Form Pseudo-ops on non-P9 target.");
+ assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
+ "D-form op must have register and immediate operands");
unsigned UpperOpcode, LowerOpcode;
switch (MI.getOpcode()) {
case PPC::DFLOADf32:
@@ -1921,6 +1987,17 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(Opcode));
return true;
}
+ case PPC::CFENCE8: {
+ auto Val = MI.getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
+ BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
+ .addImm(PPC::PRED_NE_MINUS)
+ .addReg(PPC::CR7)
+ .addImm(1);
+ MI.setDesc(get(PPC::ISYNC));
+ MI.RemoveOperand(0);
+ return true;
+ }
}
return false;
}
@@ -1931,3 +2008,7 @@ PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
return &PPC::VSRCRegClass;
return RC;
}
+
+int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
+ return PPC::getRecordFormOpcode(Opcode);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 32b2f00..b0629c8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -162,6 +162,8 @@ public:
unsigned &SubIdx) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AliasAnalysis *AA) const override;
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -253,7 +255,7 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
// Comparison optimization.
@@ -269,7 +271,7 @@ public:
///
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ void getNoop(MCInst &NopInst) const override;
std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const override;
@@ -290,6 +292,7 @@ public:
return Reg >= PPC::V0 && Reg <= PPC::V31;
}
const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
+ static int getRecordFormOpcode(unsigned Opcode);
};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index f615cc7..dd7fc26 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -32,8 +32,12 @@ def SDT_PPCstxsix : SDTypeProfile<0, 3, [
def SDT_PPCVexts : SDTypeProfile<1, 2, [
SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
]>;
+def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisVec<1>
+]>;
-def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def SDT_PPCvperm : SDTypeProfile<1, 3, [
@@ -45,13 +49,21 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
]>;
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
- SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>;
def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
+def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>,
+ SDTCisVec<1>
+]>;
+
+def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+]>;
+
def SDT_PPCvcmp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
]>;
@@ -114,14 +126,15 @@ def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
[SDNPHasChain, SDNPMayLoad]>;
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
[SDNPHasChain, SDNPMayStore]>;
def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
+def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
@@ -169,6 +182,8 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
+def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>;
+def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
@@ -243,7 +258,7 @@ def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
@@ -390,6 +405,25 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() < 4;
}]>;
+// This is a somewhat weaker condition than actually checking for 16-byte
+// alignment. It is simply checking that the displacement can be represented
+// as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form
+// instructions).
+def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isOffsetMultipleOf(N, 16);
+}]>;
+def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isOffsetMultipleOf(N, 16);
+}]>;
+def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return !isOffsetMultipleOf(N, 16);
+}]>;
+def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return !isOffsetMultipleOf(N, 16);
+}]>;
+
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -770,9 +804,10 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
}
// A single-register address. This is used with the SjLj
-// pseudo-instructions.
+// pseudo-instructions which tranlates to LD/LWZ. These instructions requires
+// G8RC_NOX0 registers.
def memr : Operand<iPTR> {
- let MIOperandInfo = (ops ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
}
def PPCTLSRegOperand : AsmOperandClass {
let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
@@ -799,7 +834,8 @@ def pred : Operand<OtherVT> {
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// The address in a single register. This is used with the SjLj
// pseudo-instructions.
@@ -1098,9 +1134,11 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
- [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+ "#ADJCALLSTACKDOWN $amt1 $amt2",
+ [(callseq_start timm:$amt1, timm:$amt2)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+ "#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -1219,9 +1257,15 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
let isCodeGenOnly = 1 in {
- def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
- "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
- /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
+ /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ def BCC : BCC_class;
+
+ // The same as BCC, except that it's not a terminator. Used for introducing
+ // control flow dependency without creating new blocks.
+ let isTerminator = 0 in def CTRL_DEP : BCC_class;
+
def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
"b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
@@ -1648,7 +1692,7 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
"lbarx $rD, $src", IIC_LdStLWARX, []>,
Requires<[HasPartwordAtomics]>;
@@ -1681,7 +1725,7 @@ def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC),
Requires<[IsISA3_0]>;
}
-let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
"stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
isDOT, Requires<[HasPartwordAtomics]>;
@@ -1694,7 +1738,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
"stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
}
-let mayStore = 1, hasSideEffects = 0 in
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC),
"stwat $rS, $rA, $FC", IIC_LdStStore>,
Requires<[IsISA3_0]>;
@@ -1740,7 +1784,7 @@ def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
@@ -1813,7 +1857,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
-let PPC970_Unit = 2 in {
+let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -1827,8 +1871,6 @@ def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
"lwzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
-
-
def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
"lhbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
@@ -1860,7 +1902,7 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
//
// Unindexed (r+i) Stores.
-let PPC970_Unit = 2 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
"stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i32:$rS, iaddr:$src)]>;
@@ -1879,7 +1921,7 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"stbu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
@@ -1948,7 +1990,7 @@ def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
}
// Indexed (r+r) Stores with Update (preinc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"stbux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
@@ -2531,6 +2573,14 @@ let Uses = [RM] in {
"mffs. $rT", IIC_IntMFFS, []>, isDOT;
}
+let Predicates = [IsISA3_0] in {
+def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "modsw $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (srem i32:$rA, i32:$rB))]>;
+def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "moduw $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (urem i32:$rA, i32:$rB))]>;
+}
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
@@ -4164,6 +4214,8 @@ def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0
def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi $rA, $rS, $n",
+ (RLDICL_32 gprc:$rA, gprc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
@@ -4422,3 +4474,190 @@ def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>;
def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>;
} // IsISA3_0
+
+// Fast 32-bit reverse bits algorithm:
+// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
+// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
+// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
+// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC);
+// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
+// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0);
+// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]):
+// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes):
+// n' = (n rotl 24); After which n' = [B4, B1, B2, B3]
+// Step 4.2: Insert B3 to the right position:
+// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3]
+// Step 4.3: Insert B1 to the right position:
+// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1]
+def MaskValues {
+ dag Lo1 = (ORI (LIS 0x5555), 0x5555);
+ dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA);
+ dag Lo2 = (ORI (LIS 0x3333), 0x3333);
+ dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC);
+ dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F);
+ dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0);
+}
+
+def Shift1 {
+ dag Right = (RLWINM $A, 31, 1, 31);
+ dag Left = (RLWINM $A, 1, 0, 30);
+}
+
+def Swap1 {
+ dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1),
+ (AND Shift1.Left, MaskValues.Hi1));
+}
+
+def Shift2 {
+ dag Right = (RLWINM Swap1.Bit, 30, 2, 31);
+ dag Left = (RLWINM Swap1.Bit, 2, 0, 29);
+}
+
+def Swap2 {
+ dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2),
+ (AND Shift2.Left, MaskValues.Hi2));
+}
+
+def Shift4 {
+ dag Right = (RLWINM Swap2.Bits, 28, 4, 31);
+ dag Left = (RLWINM Swap2.Bits, 4, 0, 27);
+}
+
+def Swap4 {
+ dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4),
+ (AND Shift4.Left, MaskValues.Hi4));
+}
+
+def Rotate {
+ dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31);
+}
+
+def RotateInsertByte3 {
+ dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15);
+}
+
+def RotateInsertByte1 {
+ dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31);
+}
+
+def : Pat<(i32 (bitreverse i32:$A)),
+ (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
+
+// Fast 64-bit reverse bits algorithm:
+// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
+// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA);
+// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
+// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC);
+// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
+// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0);
+// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]):
+// Apply the same byte reverse algorithm mentioned above for the fast 32-bit
+// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And
+// then OR them together to get the final result.
+def MaskValues64 {
+ dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32));
+ dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32));
+ dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32));
+ dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32));
+ dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32));
+ dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32));
+}
+
+def DWMaskValues {
+ dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555);
+ dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA);
+ dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333);
+ dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC);
+ dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F);
+ dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0);
+}
+
+def DWShift1 {
+ dag Right = (RLDICL $A, 63, 1);
+ dag Left = (RLDICR $A, 1, 62);
+}
+
+def DWSwap1 {
+ dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1),
+ (AND8 DWShift1.Left, DWMaskValues.Hi1));
+}
+
+def DWShift2 {
+ dag Right = (RLDICL DWSwap1.Bit, 62, 2);
+ dag Left = (RLDICR DWSwap1.Bit, 2, 61);
+}
+
+def DWSwap2 {
+ dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2),
+ (AND8 DWShift2.Left, DWMaskValues.Hi2));
+}
+
+def DWShift4 {
+ dag Right = (RLDICL DWSwap2.Bits, 60, 4);
+ dag Left = (RLDICR DWSwap2.Bits, 4, 59);
+}
+
+def DWSwap4 {
+ dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4),
+ (AND8 DWShift4.Left, DWMaskValues.Hi4));
+}
+
+// Bit swap is done, now start byte swap.
+def DWExtractLo32 {
+ dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32));
+}
+
+def DWRotateLo32 {
+ dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31);
+}
+
+def DWLo32RotateInsertByte3 {
+ dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15);
+}
+
+// Lower 32 bits in the right order
+def DWLo32RotateInsertByte1 {
+ dag Left =
+ (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31);
+}
+
+def ExtendLo32 {
+ dag To64Bit =
+ (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ DWLo32RotateInsertByte1.Left, sub_32));
+}
+
+def DWShiftHi32 { // SRDI DWSwap4.Bits, 32)
+ dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32);
+}
+
+def DWExtractHi32 {
+ dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32));
+}
+
+def DWRotateHi32 {
+ dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31);
+}
+
+def DWHi32RotateInsertByte3 {
+ dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15);
+}
+
+// High 32 bits in the right order, but in the low 32-bit position
+def DWHi32RotateInsertByte1 {
+ dag Left =
+ (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31);
+}
+
+def ExtendHi32 {
+ dag To64Bit =
+ (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ DWHi32RotateInsertByte1.Left, sub_32));
+}
+
+def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32
+ dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31);
+}
+
+def : Pat<(i64 (bitreverse i64:$A)),
+ (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0d9e345..942e8b3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -62,7 +62,7 @@ def SDTVecConv : SDTypeProfile<1, 2, [
]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
@@ -117,7 +117,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
@@ -138,11 +138,11 @@ let Uses = [RM] in {
def LXVW4X : XX1Form<31, 780,
(outs vsrc:$XT), (ins memrr:$src),
"lxvw4x $XT, $src", IIC_LdStLFD,
- [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
+ []>;
} // mayLoad
// Store indexed instructions
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSDX : XX1Form<31, 716,
(outs), (ins vsfrc:$XT, memrr:$dst),
@@ -160,7 +160,7 @@ let Uses = [RM] in {
def STXVW4X : XX1Form<31, 908,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvw4x $XT, $dst", IIC_LdStSTFD,
- [(store v4i32:$XT, xoaddr:$dst)]>;
+ []>;
}
} // mayStore
@@ -843,7 +843,9 @@ let Uses = [RM] in {
def XXPERMDI : XX3Form_2<60, 10,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
- "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm,
+ [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
+ imm32SExt16:$DM))]>;
let isCodeGenOnly = 1 in
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
@@ -1041,8 +1043,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
// Stores.
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
(STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
@@ -1053,8 +1053,12 @@ let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
}
// Permutes.
@@ -1064,6 +1068,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
+// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
+def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
+
// Selects.
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
(SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
@@ -1197,7 +1205,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD,
@@ -1211,7 +1219,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD,
@@ -1410,6 +1418,11 @@ let Predicates = [HasDirectMove] in {
"mfvsrd $rA, $XT", IIC_VecGeneral,
[(set i64:$rA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in
+ def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ []>,
+ Requires<[In64BitMode]>;
def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[(set i32:$rA, (PPCmfvsr f64:$XT))]>;
@@ -1429,7 +1442,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
"mtvsrws $XT, $rA", IIC_VecGeneral, []>;
- def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
+ def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
"mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
[]>, Requires<[In64BitMode]>;
@@ -1440,6 +1453,13 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
} // IsISA3_0, HasDirectMove
} // UseVSXReg = 1
+// We want to parse this from asm, but we don't want to emit this as it would
+// be emitted with a VSX reg. So leave Emit = 0 here.
+def : InstAlias<"mfvrd $rA, $XT",
+ (MFVRD g8rc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprd $rA, $src",
+ (MFVSRD g8rc:$rA, f8rc:$src)>;
+
/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
a doubleword are shifted left and moved for BE. For LE, they're moved, then
@@ -1878,8 +1898,100 @@ let Predicates = [IsLittleEndian, HasVSX] in
def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
(f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+// Variable index unsigned vector_extract on Power9
+let Predicates = [HasP9Altivec, IsLittleEndian] in {
+ def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBRX $Idx, $S)>;
+
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHRX (LI8 0), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHRX (LI8 2), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHRX (LI8 4), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHRX (LI8 6), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHRX (LI8 8), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHRX (LI8 10), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHRX (LI8 12), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHRX (LI8 14), $S)>;
+
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWRX (LI8 0), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (VEXTUWRX (LI8 4), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (VEXTUWRX (LI8 8), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWRX (LI8 12), $S)>;
+
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWRX (LI8 0), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (VEXTUWRX (LI8 4), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (VEXTUWRX (LI8 8), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWRX (LI8 12), $S))>;
+}
+let Predicates = [HasP9Altivec, IsBigEndian] in {
+ def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBLX $Idx, $S)>;
+
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHLX (LI8 0), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHLX (LI8 2), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHLX (LI8 4), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHLX (LI8 6), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHLX (LI8 8), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHLX (LI8 10), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHLX (LI8 12), $S)>;
+ def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHLX (LI8 14), $S)>;
+
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWLX (LI8 0), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (VEXTUWLX (LI8 4), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (VEXTUWLX (LI8 8), $S)>;
+ def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWLX (LI8 12), $S)>;
+
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWLX (LI8 0), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (VEXTUWLX (LI8 4), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (VEXTUWLX (LI8 8), $S))>;
+ def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWLX (LI8 12), $S))>;
+}
let Predicates = [IsLittleEndian, HasDirectMove] in {
// v16i8 scalar <-> vector conversions (LE)
@@ -2186,7 +2298,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // UseVSXReg = 1
// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
- // seperate pattern so that it can convert the input register class from
+ // separate pattern so that it can convert the input register class from
// VRRC(v8i16) to VSRC.
def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
(v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
@@ -2320,6 +2432,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>;
def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
+ // Vector Reverse
+ def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)),
+ (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+ def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)),
+ (v4i32 (XXBRW $A))>;
+ def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)),
+ (v2i64 (XXBRD $A))>;
+ def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)),
+ (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+
// Vector Permute
def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
IIC_VecPerm, []>;
@@ -2335,7 +2457,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
"lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
@@ -2365,8 +2487,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Load Vector Indexed
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load xoaddr:$src))]>;
-
+ [(set v2f64:$XT, (load xaddr:$src))]>;
// Load Vector (Left-justified) with Length
def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
"lxvl $XT, $src, $rB", IIC_LdStLoad,
@@ -2383,7 +2504,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
"stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
@@ -2416,7 +2537,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Store Vector Indexed
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, xoaddr:$dst)]>;
+ [(store v2f64:$XT, xaddr:$dst)]>;
// Store Vector (Left-justified) with Length
def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB),
@@ -2484,21 +2605,42 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
} // IsLittleEndian, HasP9Vector
- def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+ // D-Form Load/Store
+ def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>;
+
+ def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst),
+ (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst),
+ (STXV $rS, memrix16:$dst)>;
+
+
+ def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
-
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (LXVWSX xoaddr:$src))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
@@ -2650,21 +2792,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let isPseudo = 1 in {
def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
"#DFLOADf32",
- [(set f32:$XT, (load iaddr:$src))]>;
+ [(set f32:$XT, (load ixaddr:$src))]>;
def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
"#DFLOADf64",
- [(set f64:$XT, (load iaddr:$src))]>;
+ [(set f64:$XT, (load ixaddr:$src))]>;
def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
"#DFSTOREf32",
- [(store f32:$XT, iaddr:$dst)]>;
+ [(store f32:$XT, ixaddr:$dst)]>;
def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
"#DFSTOREf64",
- [(store f64:$XT, iaddr:$dst)]>;
+ [(store f64:$XT, ixaddr:$dst)]>;
}
- def : Pat<(f64 (extloadf32 iaddr:$src)),
- (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
- def : Pat<(f32 (fpround (extloadf32 iaddr:$src))),
- (f32 (DFLOADf32 iaddr:$src))>;
+ def : Pat<(f64 (extloadf32 ixaddr:$src)),
+ (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
+ def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))),
+ (f32 (DFLOADf32 ixaddr:$src))>;
} // end HasP9Vector, AddedComplexity
// Integer extend helper dags 32 -> 64
@@ -2681,6 +2823,58 @@ def DblToFlt {
dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
}
+
+def ByteToWord {
+ dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
+ dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
+ dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
+ dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
+ dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8));
+ dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8));
+ dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8));
+ dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8));
+}
+
+def ByteToDWord {
+ dag LE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
+ dag LE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
+ dag BE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8));
+ dag BE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8));
+}
+
+def HWordToWord {
+ dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
+ dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
+ dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
+ dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
+ dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16));
+ dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16));
+ dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16));
+ dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16));
+}
+
+def HWordToDWord {
+ dag LE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
+ dag LE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
+ dag BE_A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16));
+ dag BE_A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16));
+}
+
+def WordToDWord {
+ dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
+ dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
+ dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1))));
+ dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3))));
+}
+
def FltToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A)))));
}
@@ -2690,9 +2884,15 @@ def FltToUIntLoad {
def FltToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
}
+def FltToLongLoadP9 {
+ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A)))));
+}
def FltToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
}
+def FltToULongLoadP9 {
+ dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
+}
def FltToLong {
dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
}
@@ -2714,9 +2914,15 @@ def DblToULong {
def DblToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
}
+def DblToIntLoadP9 {
+ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A)))));
+}
def DblToUIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
}
+def DblToUIntLoadP9 {
+ dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A)))));
+}
def DblToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
}
@@ -2884,19 +3090,19 @@ let AddedComplexity = 400 in {
(v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+ def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+ (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+ def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+ (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+ def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddr:$A),
+ (DFLOADf32 ixaddr:$A),
VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+ def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddr:$A),
+ (DFLOADf32 ixaddr:$A),
VSFRC)), 0))>;
}
@@ -2921,4 +3127,49 @@ let AddedComplexity = 400 in {
(VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
(COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
}
+ // P9 Altivec instructions that can be used to build vectors.
+ // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+ // with complexities of existing build vector patterns in this file.
+ let Predicates = [HasP9Altivec, IsLittleEndian] in {
+ def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+ def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+ def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
+ HWordToWord.LE_A2, HWordToWord.LE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+ def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
+ ByteToWord.LE_A2, ByteToWord.LE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+ def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+ }
+
+ let Predicates = [HasP9Altivec, IsBigEndian] in {
+ def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+ def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+ def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
+ HWordToWord.BE_A2, HWordToWord.BE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+ def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
+ ByteToWord.BE_A2, ByteToWord.BE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+ def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+ }
+
+ let Predicates = [HasP9Altivec] in {
+ def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)),
+ (v2i64 (VEXTSB2D $A))>;
+ def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)),
+ (v2i64 (VEXTSH2D $A))>;
+ def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),
+ (v2i64 (VEXTSW2D $A))>;
+ def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),
+ (v4i32 (VEXTSB2W $A))>;
+ def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),
+ (v4i32 (VEXTSH2W $A))>;
+ }
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 2c3e755..a349fa1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -72,9 +73,10 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) {
+ PPCLoopPreIncPrep() : FunctionPass(ID) {
initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
}
+
PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
}
@@ -93,7 +95,7 @@ namespace {
bool rotateLoop(Loop *L);
private:
- PPCTargetMachine *TM;
+ PPCTargetMachine *TM = nullptr;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index e527b01..b310493 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCMCExpr.h"
+#include "PPC.h"
#include "PPCSubtarget.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
@@ -148,7 +148,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 2413af3..ff5f17c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -19,9 +19,9 @@
//
//===---------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -147,9 +147,9 @@ bool PPCMIPeephole::simplifyCode(void) {
<< "Optimizing load-and-splat/splat "
"to load-and-splat/copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -169,9 +169,9 @@ bool PPCMIPeephole::simplifyCode(void) {
<< "Optimizing splat/swap or splat/splat "
"to splat/copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -194,9 +194,9 @@ bool PPCMIPeephole::simplifyCode(void) {
else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(DefMI->getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(DefMI->getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -251,7 +251,7 @@ bool PPCMIPeephole::simplifyCode(void) {
DEBUG(MI.dump());
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(OpNo));
+ .add(MI.getOperand(OpNo));
ToErase = &MI;
Simplified = true;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 9d91e31..bc2d9a0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,14 +8,13 @@
//===----------------------------------------------------------------------===//
#include "PPCMachineFunctionInfo.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-void PPCFunctionInfo::anchor() { }
+void PPCFunctionInfo::anchor() {}
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
const DataLayout &DL = MF.getDataLayout();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 4c29aa0..202e100 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -26,17 +27,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// FramePointerSaveIndex - Frame index of where the old frame pointer is
/// stored. Also used as an anchor for instructions that need to be altered
/// when using frame pointers (dyna_add, dyna_sub.)
- int FramePointerSaveIndex;
+ int FramePointerSaveIndex = 0;
/// ReturnAddrSaveIndex - Frame index of where the return address is stored.
///
- int ReturnAddrSaveIndex;
+ int ReturnAddrSaveIndex = 0;
/// Frame index where the old base pointer is stored.
- int BasePointerSaveIndex;
+ int BasePointerSaveIndex = 0;
/// Frame index where the old PIC base pointer is stored.
- int PICBasePointerSaveIndex;
+ int PICBasePointerSaveIndex = 0;
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
@@ -44,54 +45,58 @@ class PPCFunctionInfo : public MachineFunctionInfo {
bool MustSaveLR;
/// Does this function have any stack spills.
- bool HasSpills;
+ bool HasSpills = false;
/// Does this function spill using instructions with only r+r (not r+i)
/// forms.
- bool HasNonRISpills;
+ bool HasNonRISpills = false;
/// SpillsCR - Indicates whether CR is spilled in the current function.
- bool SpillsCR;
+ bool SpillsCR = false;
/// Indicates whether VRSAVE is spilled in the current function.
- bool SpillsVRSAVE;
+ bool SpillsVRSAVE = false;
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
/// entry, even though LR may otherwise apparently not be used.
- bool LRStoreRequired;
+ bool LRStoreRequired = false;
/// This function makes use of the PPC64 ELF TOC base pointer (register r2).
- bool UsesTOCBasePtr;
+ bool UsesTOCBasePtr = false;
/// MinReservedArea - This is the frame size that is at least reserved in a
/// potential caller (parameter+linkage area).
- unsigned MinReservedArea;
+ unsigned MinReservedArea = 0;
/// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
/// amount the stack pointer is adjusted to make the frame bigger for tail
/// calls. Used for creating an area before the register spill area.
- int TailCallSPDelta;
+ int TailCallSPDelta = 0;
/// HasFastCall - Does this function contain a fast call. Used to determine
/// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
- bool HasFastCall;
+ bool HasFastCall = false;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
+
/// VarArgsStackOffset - StackOffset for start of stack
/// arguments.
- int VarArgsStackOffset;
+
+ int VarArgsStackOffset = 0;
+
/// VarArgsNumGPR - Index of the first unused integer
/// register for parameter passing.
- unsigned VarArgsNumGPR;
+ unsigned VarArgsNumGPR = 0;
+
/// VarArgsNumFPR - Index of the first unused double
/// register for parameter passing.
- unsigned VarArgsNumFPR;
+ unsigned VarArgsNumFPR = 0;
/// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
- int CRSpillFrameIndex;
+ int CRSpillFrameIndex = 0;
/// If any of CR[2-4] need to be saved in the prologue and restored in the
/// epilogue then they are added to this array. This is used for the
@@ -102,35 +107,14 @@ class PPCFunctionInfo : public MachineFunctionInfo {
MachineFunction &MF;
/// Whether this uses the PIC Base register or not.
- bool UsesPICBase;
+ bool UsesPICBase = false;
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
public:
- explicit PPCFunctionInfo(MachineFunction &MF)
- : FramePointerSaveIndex(0),
- ReturnAddrSaveIndex(0),
- BasePointerSaveIndex(0),
- PICBasePointerSaveIndex(0),
- HasSpills(false),
- HasNonRISpills(false),
- SpillsCR(false),
- SpillsVRSAVE(false),
- LRStoreRequired(false),
- UsesTOCBasePtr(false),
- MinReservedArea(0),
- TailCallSPDelta(0),
- HasFastCall(false),
- VarArgsFrameIndex(0),
- VarArgsStackOffset(0),
- VarArgsNumGPR(0),
- VarArgsNumFPR(0),
- CRSpillFrameIndex(0),
- MF(MF),
- UsesPICBase(0),
- IsSplitCSR(false) {}
+ explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -211,7 +195,6 @@ public:
MCSymbol *getTOCOffsetSymbol() const;
};
-} // end of namespace llvm
-
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e492014..9207165 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -209,89 +209,84 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
- Reserved.set(PPC::ZERO);
- Reserved.set(PPC::ZERO8);
+ markSuperRegs(Reserved, PPC::ZERO);
// The FP register is also not really a register, but is the representation
// of the frame pointer register used by ISD::FRAMEADDR.
- Reserved.set(PPC::FP);
- Reserved.set(PPC::FP8);
+ markSuperRegs(Reserved, PPC::FP);
// The BP register is also not really a register, but is the representation
// of the base pointer register used by setjmp.
- Reserved.set(PPC::BP);
- Reserved.set(PPC::BP8);
+ markSuperRegs(Reserved, PPC::BP);
// The counter registers must be reserved so that counter-based loops can
// be correctly formed (and the mtctr instructions are not DCE'd).
- Reserved.set(PPC::CTR);
- Reserved.set(PPC::CTR8);
+ markSuperRegs(Reserved, PPC::CTR);
+ markSuperRegs(Reserved, PPC::CTR8);
- Reserved.set(PPC::R1);
- Reserved.set(PPC::LR);
- Reserved.set(PPC::LR8);
- Reserved.set(PPC::RM);
+ markSuperRegs(Reserved, PPC::R1);
+ markSuperRegs(Reserved, PPC::LR);
+ markSuperRegs(Reserved, PPC::LR8);
+ markSuperRegs(Reserved, PPC::RM);
if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
- Reserved.set(PPC::VRSAVE);
+ markSuperRegs(Reserved, PPC::VRSAVE);
// The SVR4 ABI reserves r2 and r13
if (Subtarget.isSVR4ABI()) {
- Reserved.set(PPC::R2); // System-reserved register
- Reserved.set(PPC::R13); // Small Data Area pointer register
+ // We only reserve r2 if we need to use the TOC pointer. If we have no
+ // explicit uses of the TOC pointer (meaning we're a leaf function with
+ // no constant-pool loads, etc.) and we have no potential uses inside an
+ // inline asm block, then we can treat r2 has an ordinary callee-saved
+ // register.
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ if (!TM.isPPC64() || FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
+ markSuperRegs(Reserved, PPC::R2); // System-reserved register
+ markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register
}
// On PPC64, r13 is the thread pointer. Never allocate this register.
- if (TM.isPPC64()) {
- Reserved.set(PPC::R13);
-
- Reserved.set(PPC::X1);
- Reserved.set(PPC::X13);
-
- if (TFI->needsFP(MF))
- Reserved.set(PPC::X31);
-
- if (hasBasePointer(MF))
- Reserved.set(PPC::X30);
-
- // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
- if (Subtarget.isSVR4ABI()) {
- // We only reserve r2 if we need to use the TOC pointer. If we have no
- // explicit uses of the TOC pointer (meaning we're a leaf function with
- // no constant-pool loads, etc.) and we have no potential uses inside an
- // inline asm block, then we can treat r2 has an ordinary callee-saved
- // register.
- const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- if (FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
- Reserved.set(PPC::X2);
- else
- Reserved.reset(PPC::R2);
- }
- }
+ if (TM.isPPC64())
+ markSuperRegs(Reserved, PPC::R13);
if (TFI->needsFP(MF))
- Reserved.set(PPC::R31);
+ markSuperRegs(Reserved, PPC::R31);
bool IsPositionIndependent = TM.isPositionIndependent();
if (hasBasePointer(MF)) {
if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
- Reserved.set(PPC::R29);
+ markSuperRegs(Reserved, PPC::R29);
else
- Reserved.set(PPC::R30);
+ markSuperRegs(Reserved, PPC::R30);
}
if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
- Reserved.set(PPC::R30);
+ markSuperRegs(Reserved, PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
if (!Subtarget.hasAltivec())
for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(),
IE = PPC::VRRCRegClass.end(); I != IE; ++I)
- Reserved.set(*I);
+ markSuperRegs(Reserved, *I);
+ assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
+bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ if (TM.isELFv2ABI() && PhysReg == PPC::X2) {
+ // X2 is guaranteed to be preserved within a function if it is reserved.
+ // The reason it's reserved is that it's the TOC pointer (and the function
+ // uses the TOC). In functions where it isn't reserved (i.e. leaf functions
+ // with no TOC access), we can't claim that it is preserved.
+ return (getReservedRegs(MF).test(PPC::X2));
+ } else {
+ return false;
+ }
+}
+
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
const PPCFrameLowering *TFI = getFrameLowering(MF);
@@ -394,9 +389,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
- .addReg(PPC::R31)
- .addImm(FrameSize);
+ if (LP64)
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), Reg)
+ .addReg(PPC::X31)
+ .addImm(FrameSize);
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
} else if (LP64) {
BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
.addImm(0)
@@ -483,8 +483,10 @@ void PPCRegisterInfo::lowerDynamicAreaOffset(
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
+ bool is64Bit = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
- BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg())
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI),
+ MI.getOperand(0).getReg())
.addImm(maxCallFrameSize);
MBB.erase(II);
}
@@ -752,19 +754,31 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return false;
}
-// Figure out if the offset in the instruction must be a multiple of 4.
-// This is true for instructions like "STD".
-static bool usesIXAddr(const MachineInstr &MI) {
+// If the offset must be a multiple of some value, return what that value is.
+static unsigned offsetMinAlign(const MachineInstr &MI) {
unsigned OpC = MI.getOpcode();
switch (OpC) {
default:
- return false;
+ return 1;
case PPC::LWA:
case PPC::LWA_32:
case PPC::LD:
+ case PPC::LDU:
case PPC::STD:
- return true;
+ case PPC::STDU:
+ case PPC::DFLOADf32:
+ case PPC::DFLOADf64:
+ case PPC::DFSTOREf32:
+ case PPC::DFSTOREf64:
+ case PPC::LXSD:
+ case PPC::LXSSP:
+ case PPC::STXSD:
+ case PPC::STXSSP:
+ return 4;
+ case PPC::LXV:
+ case PPC::STXV:
+ return 16;
}
}
@@ -850,9 +864,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum).ChangeToRegister(
FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false);
- // Figure out if the offset in the instruction is shifted right two bits.
- bool isIXAddr = usesIXAddr(MI);
-
// If the instruction is not present in ImmToIdxMap, then it has no immediate
// form (and must be r+r).
bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
@@ -881,7 +892,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
"This should be handled in a target-independent way");
- if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) ||
+ if (!noImmForm && ((isInt<16>(Offset) &&
+ ((Offset % offsetMinAlign(MI)) == 0)) ||
OpC == TargetOpcode::STACKMAP ||
OpC == TargetOpcode::PATCHPOINT)) {
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -1074,5 +1086,5 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
MI->getOpcode() == TargetOpcode::STACKMAP ||
MI->getOpcode() == TargetOpcode::PATCHPOINT ||
- (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+ (isInt<16>(Offset) && (Offset % offsetMinAlign(*MI)) == 0);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 4a96327..0bbb71f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -83,6 +83,7 @@ public:
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
/// We require the register scavenger.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 8e52da5..79963dd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -377,7 +377,7 @@ def P8Itineraries : ProcessorItineraries<
InstrStage<1, [P8_FPU1, P8_FPU2]>],
[7, 1, 1]>,
InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
- InstrStage<1, [P8_FPU2, P8_FPU2]>],
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
[3, 1, 1]>
]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index a9c1bd7..a01995a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -260,8 +260,8 @@ let SchedModel = P9Model in {
// ***************** Defining Itinerary Class Resources *****************
- def : ItinRW<[P9_DFU_76C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntSimple,
- IIC_IntGeneral]>;
+ def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+ [IIC_IntSimple, IIC_IntGeneral]>;
def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
[IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index e8a87e7..ccf0f80 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -220,8 +220,8 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
-unsigned char PPCSubtarget::classifyGlobalReference(
- const GlobalValue *GV) const {
+unsigned char
+PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const {
// Note that currently we don't generate non-pic references.
// If a caller wants that, this will have to be updated.
@@ -229,23 +229,9 @@ unsigned char PPCSubtarget::classifyGlobalReference(
if (TM.getCodeModel() == CodeModel::Large)
return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
- unsigned char flags = PPCII::MO_PIC_FLAG;
-
- // Only if the relocation mode is PIC do we have to worry about
- // interposition. In all other cases we can use a slightly looser standard to
- // decide how to access the symbol.
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // If it's local, or it's non-default, it can't be interposed.
- if (!GV->hasLocalLinkage() &&
- GV->hasDefaultVisibility()) {
- flags |= PPCII::MO_NLP_FLAG;
- }
- return flags;
- }
-
- if (GV->isStrongDefinitionForLinker())
- return flags;
- return flags | PPCII::MO_NLP_FLAG;
+ if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return PPCII::MO_PIC_FLAG;
+ return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
}
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 7fd9079..90d11f4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -272,6 +272,13 @@ public:
return 16;
}
+
+ // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
+ // red zone and PPC64 SVR4ABI has a 288-byte red zone.
+ unsigned getRedZoneSize() const {
+ return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0);
+ }
+
bool hasHTM() const { return HasHTM; }
bool hasFusion() const { return HasFusion; }
bool hasFloat128() const { return HasFloat128; }
@@ -298,7 +305,9 @@ public:
bool isSVR4ABI() const { return !isDarwinABI(); }
bool isELFv2ABI() const;
- bool enableEarlyIfConversion() const override { return hasISEL(); }
+ /// Originally, this function return hasISEL(). Now we always enable it,
+ /// but may expand the ISEL instruction later.
+ bool enableEarlyIfConversion() const override { return true; }
// Scheduling customization.
bool enableMachineScheduler() const override;
@@ -316,6 +325,8 @@ public:
/// classifyGlobalReference - Classify a global variable reference for the
/// current subtarget accourding to how we should reference it.
unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+
+ bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 0c1260a..5f8085f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -21,9 +21,9 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -52,6 +52,7 @@ namespace {
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
@@ -62,6 +63,16 @@ protected:
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32) {
+
+ // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
+ // as scheduling fences, we skip creating fences if we already
+ // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
+ // which causes verification error with -verify-machineinstrs.
+ if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN)
+ NeedFence = false;
+ else if (MI.getOpcode() == PPC::ADJCALLSTACKUP)
+ NeedFence = true;
+
++I;
continue;
}
@@ -96,10 +107,15 @@ protected:
break;
}
- // Don't really need to save data to the stack - the clobbered
+ // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
+ // as schduling fence to avoid it is scheduled before
+ // mflr in the prologue and the address in LR is clobbered (PR25839).
+ // We don't really need to save data to the stack - the clobbered
// registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
// gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
+ .addImm(0);
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
@@ -115,7 +131,8 @@ protected:
.addReg(GPR3));
Call->addOperand(MI.getOperand(3));
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 7c53a56..17345b6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -61,8 +61,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 91b1d24..fe092cc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -12,20 +12,32 @@
//===----------------------------------------------------------------------===//
#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
#include "PPCTargetTransformInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <cassert>
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::
@@ -74,12 +86,14 @@ EnableMachineCombinerPass("ppc-machine-combiner",
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
- RegisterTargetMachine<PPC32TargetMachine> A(getThePPC32Target());
- RegisterTargetMachine<PPC64TargetMachine> B(getThePPC64Target());
- RegisterTargetMachine<PPC64TargetMachine> C(getThePPC64LETarget());
+ RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
+ RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
+ RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
+ initializePPCExpandISELPass(PR);
+ initializePPCTLSDynamicCallPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -149,9 +163,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
// If it isn't a Mach-O file then it's going to be a linux ELF
// object file.
if (TT.isOSDarwin())
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
- return make_unique<PPC64LinuxTargetObjectFile>();
+ return llvm::make_unique<PPC64LinuxTargetObjectFile>();
}
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
@@ -164,32 +178,34 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
assert(Options.MCOptions.getABIName().empty() &&
"Unknown target-abi option!");
- if (!TT.isMacOSX()) {
- switch (TT.getArch()) {
- case Triple::ppc64le:
- return PPCTargetMachine::PPC_ABI_ELFv2;
- case Triple::ppc64:
- return PPCTargetMachine::PPC_ABI_ELFv1;
- default:
- // Fallthrough.
- ;
- }
+ if (TT.isMacOSX())
+ return PPCTargetMachine::PPC_ABI_UNKNOWN;
+
+ switch (TT.getArch()) {
+ case Triple::ppc64le:
+ return PPCTargetMachine::PPC_ABI_ELFv2;
+ case Triple::ppc64:
+ return PPCTargetMachine::PPC_ABI_ELFv1;
+ default:
+ return PPCTargetMachine::PPC_ABI_UNKNOWN;
}
- return PPCTargetMachine::PPC_ABI_UNKNOWN;
}
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- if (!RM.hasValue()) {
- if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
- if (!TT.isOSBinFormatMachO() && !TT.isMacOSX())
- return Reloc::PIC_;
- }
- if (TT.isOSDarwin())
- return Reloc::DynamicNoPIC;
- return Reloc::Static;
- }
- return *RM;
+ if (RM.hasValue())
+ return *RM;
+
+ // Darwin defaults to dynamic-no-pic.
+ if (TT.isOSDarwin())
+ return Reloc::DynamicNoPIC;
+
+ // Non-darwin 64-bit platforms are PIC by default.
+ if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)
+ return Reloc::PIC_;
+
+ // 32-bit is static by default.
+ return Reloc::Static;
}
// The FeatureString here is a little subtle. We are modifying the feature
@@ -205,33 +221,11 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM), CM, OL),
TLOF(createTLOF(getTargetTriple())),
- TargetABI(computeTargetABI(TT, Options)),
- Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
-
+ TargetABI(computeTargetABI(TT, Options)) {
initAsmInfo();
}
-PPCTargetMachine::~PPCTargetMachine() {}
-
-void PPC32TargetMachine::anchor() { }
-
-PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
-
-void PPC64TargetMachine::anchor() { }
-
-PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+PPCTargetMachine::~PPCTargetMachine() = default;
const PPCSubtarget *
PPCTargetMachine::getSubtargetImpl(const Function &F) const {
@@ -281,10 +275,11 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
//===----------------------------------------------------------------------===//
namespace {
+
/// PPC Code Generator Pass Configuration Options.
class PPCPassConfig : public TargetPassConfig {
public:
- PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
+ PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
PPCTargetMachine &getPPCTargetMachine() const {
@@ -300,16 +295,17 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new PPCPassConfig(this, PM);
+ return new PPCPassConfig(*this, PM);
}
void PPCPassConfig::addIRPasses() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createPPCBoolRetToIntPass());
- addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+ addPass(createAtomicExpandPass());
// For the BG/Q (or if explicitly requested), add explicit data prefetch
// intrinsics.
@@ -341,7 +337,7 @@ bool PPCPassConfig::addPreISel() {
addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCCTRLoops(getPPCTargetMachine()));
+ addPass(createPPCCTRLoops());
return false;
}
@@ -357,7 +353,7 @@ bool PPCPassConfig::addILPOpts() {
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
- addPass(createPPCISelDag(getPPCTargetMachine()));
+ addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
#ifndef NDEBUG
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
@@ -393,7 +389,7 @@ void PPCPassConfig::addPreRegAlloc() {
// FIXME: We probably don't need to run these for -fPIE.
if (getPPCTargetMachine().isPositionIndependent()) {
// FIXME: LiveVariables should not be necessary here!
- // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on
+ // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
// LiveVariables. This (unnecessary) dependency has been removed now,
// however a stage-2 clang build fails without LiveVariables computed here.
addPass(&LiveVariablesID, false);
@@ -416,6 +412,8 @@ void PPCPassConfig::addPreSched2() {
}
void PPCPassConfig::addPreEmitPass() {
+ addPass(createPPCExpandISELPass());
+
if (getOptLevel() != CodeGenOpt::None)
addPass(createPPCEarlyReturnPass(), false);
// Must run branch selection immediately preceding the asm printer.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 59b4f1e..be70550 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -23,13 +23,12 @@ namespace llvm {
/// Common code between 32-bit and 64-bit PowerPC targets.
///
-class PPCTargetMachine : public LLVMTargetMachine {
+class PPCTargetMachine final : public LLVMTargetMachine {
public:
enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 };
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
- PPCSubtarget Subtarget;
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
@@ -42,6 +41,9 @@ public:
~PPCTargetMachine() override;
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
+ // The no argument getSubtargetImpl, while it exists on some targets, is
+ // deprecated and should not be used.
+ const PPCSubtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
@@ -56,30 +58,11 @@ public:
const Triple &TT = getTargetTriple();
return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
};
-};
-
-/// PowerPC 32-bit target machine.
-///
-class PPC32TargetMachine : public PPCTargetMachine {
- virtual void anchor();
-public:
- PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-/// PowerPC 64-bit target machine.
-///
-class PPC64TargetMachine : public PPCTargetMachine {
- virtual void anchor();
-public:
- PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
index dbe7617..310fea9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -1,4 +1,4 @@
-//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,18 +10,26 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCStreamer.h"
namespace llvm {
+
+class MCExpr;
+class MCSymbol;
+class MCSymbolELF;
+
class PPCTargetStreamer : public MCTargetStreamer {
public:
PPCTargetStreamer(MCStreamer &S);
~PPCTargetStreamer() override;
+
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
virtual void emitAbiVersion(int AbiVersion) = 0;
virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f94d1ea..6110706 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -189,7 +189,7 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-void PPCTTIImpl::getUnrollingPreferences(Loop *L,
+void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
@@ -201,7 +201,7 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
UP.AllowExpensiveTripCount = true;
}
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
@@ -215,6 +215,11 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
return LoopHasReductions;
}
+bool PPCTTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) {
+ MaxLoadSize = 8;
+ return true;
+}
+
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
return true;
}
@@ -225,7 +230,7 @@ unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
return ST->hasVSX() ? 64 : 32;
}
-unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
@@ -239,9 +244,18 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
}
unsigned PPCTTIImpl::getCacheLineSize() {
- // This is currently only used for the data prefetch pass which is only
- // enabled for BG/Q by default.
- return CacheLineSize;
+ // Check first if the user specified a custom line size.
+ if (CacheLineSize.getNumOccurrences() > 0)
+ return CacheLineSize;
+
+ // On P7, P8 or P9 we have a cache line size of 128.
+ unsigned Directive = ST->getDarwinDirective();
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
+ Directive == PPC::DIR_PWR9)
+ return 128;
+
+ // On other processors return a default of 64 bytes.
+ return 64;
}
unsigned PPCTTIImpl::getPrefetchDistance() {
@@ -302,14 +316,16 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first;
}
-int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -352,7 +368,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@@ -401,6 +417,10 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
if (IsVSXType || (ST->hasVSX() && IsAltivecType))
return Cost;
+ // Newer PPC supports unaligned memory access.
+ if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
+ return Cost;
+
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 30ee281..99ca639 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -52,7 +52,8 @@ public:
Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
/// @}
@@ -60,9 +61,10 @@ public:
/// @{
bool enableAggressiveInterleaving(bool LoopHasReductions);
+ bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
unsigned getMaxInterleaveFactor(unsigned VF);
@@ -74,11 +76,13 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 3b5d8f0..93fe323 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCHazardRecognizers.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -112,7 +112,7 @@ protected:
TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
.addImm(1) // add 1, not 0, because there is no implicit clearing
// of the high bits.
- .addOperand(SrcMO)
+ .add(SrcMO)
.addImm(PPC::sub_64);
// The source of the original copy is now the new virtual register.
@@ -132,7 +132,7 @@ protected:
unsigned NewVReg = MRI.createVirtualRegister(DstRC);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg)
- .addOperand(SrcMO);
+ .add(SrcMO);
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index f6d20ce..a57484e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -12,10 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 8197285..7d34efd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -42,9 +42,9 @@
//
//===---------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
@@ -195,8 +195,10 @@ public:
return false;
// If we don't have VSX on the subtarget, don't do anything.
+ // Also, on Power 9 the load and store ops preserve element order and so
+ // the swaps are not required.
const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
- if (!STI.hasVSX())
+ if (!STI.hasVSX() || !STI.needsSwapsForVSXMemOps())
return false;
bool Changed = false;
@@ -522,7 +524,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (RelevantFunction) {
DEBUG(dbgs() << "Swap vector when first built\n\n");
- dumpSwapVector();
+ DEBUG(dumpSwapVector());
}
return RelevantFunction;
@@ -731,7 +733,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
}
DEBUG(dbgs() << "Swap vector after web analysis:\n\n");
- dumpSwapVector();
+ DEBUG(dumpSwapVector());
}
// Walk the swap vector entries looking for swaps fed by permuting loads
@@ -936,9 +938,9 @@ bool PPCVSXSwapRemoval::removeSwaps() {
Changed = true;
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
MachineBasicBlock *MBB = MI->getParent();
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
- .addOperand(MI->getOperand(1));
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .add(MI->getOperand(1));
DEBUG(dbgs() << format("Replaced %d with copy: ",
SwapVector[EntryIdx].VSEId));
@@ -951,77 +953,78 @@ bool PPCVSXSwapRemoval::removeSwaps() {
return Changed;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// For debug purposes, dump the contents of the swap vector.
-void PPCVSXSwapRemoval::dumpSwapVector() {
+LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() {
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
int ID = SwapVector[EntryIdx].VSEId;
- DEBUG(dbgs() << format("%6d", ID));
- DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID)));
- DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber()));
- DEBUG(dbgs() << format(" %14s ",
- TII->getName(MI->getOpcode()).str().c_str()));
+ dbgs() << format("%6d", ID);
+ dbgs() << format("%6d", EC->getLeaderValue(ID));
+ dbgs() << format(" BB#%3d", MI->getParent()->getNumber());
+ dbgs() << format(" %14s ", TII->getName(MI->getOpcode()).str().c_str());
if (SwapVector[EntryIdx].IsLoad)
- DEBUG(dbgs() << "load ");
+ dbgs() << "load ";
if (SwapVector[EntryIdx].IsStore)
- DEBUG(dbgs() << "store ");
+ dbgs() << "store ";
if (SwapVector[EntryIdx].IsSwap)
- DEBUG(dbgs() << "swap ");
+ dbgs() << "swap ";
if (SwapVector[EntryIdx].MentionsPhysVR)
- DEBUG(dbgs() << "physreg ");
+ dbgs() << "physreg ";
if (SwapVector[EntryIdx].MentionsPartialVR)
- DEBUG(dbgs() << "partialreg ");
+ dbgs() << "partialreg ";
if (SwapVector[EntryIdx].IsSwappable) {
- DEBUG(dbgs() << "swappable ");
+ dbgs() << "swappable ";
switch(SwapVector[EntryIdx].SpecialHandling) {
default:
- DEBUG(dbgs() << "special:**unknown**");
+ dbgs() << "special:**unknown**";
break;
case SH_NONE:
break;
case SH_EXTRACT:
- DEBUG(dbgs() << "special:extract ");
+ dbgs() << "special:extract ";
break;
case SH_INSERT:
- DEBUG(dbgs() << "special:insert ");
+ dbgs() << "special:insert ";
break;
case SH_NOSWAP_LD:
- DEBUG(dbgs() << "special:load ");
+ dbgs() << "special:load ";
break;
case SH_NOSWAP_ST:
- DEBUG(dbgs() << "special:store ");
+ dbgs() << "special:store ";
break;
case SH_SPLAT:
- DEBUG(dbgs() << "special:splat ");
+ dbgs() << "special:splat ";
break;
case SH_XXPERMDI:
- DEBUG(dbgs() << "special:xxpermdi ");
+ dbgs() << "special:xxpermdi ";
break;
case SH_COPYWIDEN:
- DEBUG(dbgs() << "special:copywiden ");
+ dbgs() << "special:copywiden ";
break;
}
}
if (SwapVector[EntryIdx].WebRejected)
- DEBUG(dbgs() << "rejected ");
+ dbgs() << "rejected ";
if (SwapVector[EntryIdx].WillRemove)
- DEBUG(dbgs() << "remove ");
+ dbgs() << "remove ";
- DEBUG(dbgs() << "\n");
+ dbgs() << "\n";
// For no-asserts builds.
(void)MI;
(void)ID;
}
- DEBUG(dbgs() << "\n");
+ dbgs() << "\n";
}
+#endif
} // end default namespace
OpenPOWER on IntegriCloud