summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/SystemZ
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/SystemZ')
-rw-r--r--contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp475
-rw-r--r--contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp323
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp23
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp49
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp114
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp81
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/SystemZ/README.txt65
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.h42
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.td7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp136
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp471
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp108
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h27
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp603
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp1523
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h139
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td320
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td1242
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp907
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h150
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td1226
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp462
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp116
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h15
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h12
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperands.td153
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperators.td208
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td99
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td46
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp37
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h16
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td71
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp293
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h80
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp163
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp15
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h22
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp50
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h4
46 files changed, 8153 insertions, 1815 deletions
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index c7725a1..763f40c 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -28,21 +30,29 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
}
namespace {
+enum RegisterKind {
+ GR32Reg,
+ GRH32Reg,
+ GR64Reg,
+ GR128Reg,
+ ADDR32Reg,
+ ADDR64Reg,
+ FP32Reg,
+ FP64Reg,
+ FP128Reg
+};
+
+enum MemoryKind {
+ BDMem,
+ BDXMem,
+ BDLMem
+};
+
class SystemZOperand : public MCParsedAsmOperand {
public:
- enum RegisterKind {
- GR32Reg,
- GR64Reg,
- GR128Reg,
- ADDR32Reg,
- ADDR64Reg,
- FP32Reg,
- FP64Reg,
- FP128Reg
- };
-
private:
enum OperandKind {
+ KindInvalid,
KindToken,
KindReg,
KindAccessReg,
@@ -59,7 +69,15 @@ private:
unsigned Length;
};
- // LLVM register Num, which has kind Kind.
+ // LLVM register Num, which has kind Kind. In some ways it might be
+ // easier for this class to have a register bank (general, floating-point
+ // or access) and a raw register number (0-15). This would postpone the
+ // interpretation of the operand to the add*() methods and avoid the need
+ // for context-dependent parsing. However, we do things the current way
+ // because of the virtual getReg() method, which needs to distinguish
+ // between (say) %r0 used as a single register and %r0 used as a pair.
+ // Context-dependent parsing can also give us slightly better error
+ // messages when invalid pairs like %r1 are used.
struct RegOp {
RegisterKind Kind;
unsigned Num;
@@ -67,12 +85,15 @@ private:
// Base + Disp + Index, where Base and Index are LLVM registers or 0.
// RegKind says what type the registers have (ADDR32Reg or ADDR64Reg).
+ // Length is the operand length for D(L,B)-style operands, otherwise
+ // it is null.
struct MemOp {
unsigned Base : 8;
unsigned Index : 8;
unsigned RegKind : 8;
unsigned Unused : 8;
const MCExpr *Disp;
+ const MCExpr *Length;
};
union {
@@ -99,6 +120,9 @@ private:
public:
// Create particular kinds of operand.
+ static SystemZOperand *createInvalid(SMLoc StartLoc, SMLoc EndLoc) {
+ return new SystemZOperand(KindInvalid, StartLoc, EndLoc);
+ }
static SystemZOperand *createToken(StringRef Str, SMLoc Loc) {
SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc);
Op->Token.Data = Str.data();
@@ -126,12 +150,14 @@ public:
}
static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base,
const MCExpr *Disp, unsigned Index,
- SMLoc StartLoc, SMLoc EndLoc) {
+ const MCExpr *Length, SMLoc StartLoc,
+ SMLoc EndLoc) {
SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc);
Op->Mem.RegKind = RegKind;
Op->Mem.Base = Base;
Op->Mem.Index = Index;
Op->Mem.Disp = Disp;
+ Op->Mem.Length = Length;
return Op;
}
@@ -178,16 +204,20 @@ public:
virtual bool isMem() const LLVM_OVERRIDE {
return Kind == KindMem;
}
- bool isMem(RegisterKind RegKind, bool HasIndex) const {
+ bool isMem(RegisterKind RegKind, MemoryKind MemKind) const {
return (Kind == KindMem &&
Mem.RegKind == RegKind &&
- (HasIndex || !Mem.Index));
+ (MemKind == BDXMem || !Mem.Index) &&
+ (MemKind == BDLMem) == (Mem.Length != 0));
+ }
+ bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const {
+ return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff);
}
- bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const {
- return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff);
+ bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const {
+ return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287);
}
- bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const {
- return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287);
+ bool isMemDisp12Len8(RegisterKind RegKind) const {
+ return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100);
}
// Override MCParsedAsmOperand.
@@ -223,9 +253,18 @@ public:
addExpr(Inst, Mem.Disp);
Inst.addOperand(MCOperand::CreateReg(Mem.Index));
}
+ void addBDLAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(Kind == KindMem && "Invalid operand type");
+ Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ addExpr(Inst, Mem.Length);
+ }
// Used by the TableGen code to check for particular operand types.
bool isGR32() const { return isReg(GR32Reg); }
+ bool isGRH32() const { return isReg(GRH32Reg); }
+ bool isGRX32() const { return false; }
bool isGR64() const { return isReg(GR64Reg); }
bool isGR128() const { return isReg(GR128Reg); }
bool isADDR32() const { return isReg(ADDR32Reg); }
@@ -234,12 +273,13 @@ public:
bool isFP32() const { return isReg(FP32Reg); }
bool isFP64() const { return isReg(FP64Reg); }
bool isFP128() const { return isReg(FP128Reg); }
- bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); }
- bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); }
- bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); }
- bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); }
- bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); }
- bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); }
+ bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); }
+ bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); }
+ bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); }
+ bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); }
+ bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); }
+ bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); }
+ bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
bool isU4Imm() const { return isImm(0, 15); }
bool isU6Imm() const { return isImm(0, 63); }
bool isU8Imm() const { return isImm(0, 255); }
@@ -250,46 +290,6 @@ public:
bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); }
};
-// Maps of asm register numbers to LLVM register numbers, with 0 indicating
-// an invalid register. We don't use register class directly because that
-// specifies the allocation order.
-static const unsigned GR32Regs[] = {
- SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
- SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
- SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
- SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
-};
-static const unsigned GR64Regs[] = {
- SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
- SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
- SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
- SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
-};
-static const unsigned GR128Regs[] = {
- SystemZ::R0Q, 0, SystemZ::R2Q, 0,
- SystemZ::R4Q, 0, SystemZ::R6Q, 0,
- SystemZ::R8Q, 0, SystemZ::R10Q, 0,
- SystemZ::R12Q, 0, SystemZ::R14Q, 0
-};
-static const unsigned FP32Regs[] = {
- SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
- SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
- SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
- SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
-};
-static const unsigned FP64Regs[] = {
- SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
- SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
- SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
- SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
-};
-static const unsigned FP128Regs[] = {
- SystemZ::F0Q, SystemZ::F1Q, 0, 0,
- SystemZ::F4Q, SystemZ::F5Q, 0, 0,
- SystemZ::F8Q, SystemZ::F9Q, 0, 0,
- SystemZ::F12Q, SystemZ::F13Q, 0, 0
-};
-
class SystemZAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "SystemZGenAsmMatcher.inc"
@@ -297,35 +297,42 @@ class SystemZAsmParser : public MCTargetAsmParser {
private:
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ enum RegisterGroup {
+ RegGR,
+ RegFP,
+ RegAccess
+ };
struct Register {
- char Prefix;
- unsigned Number;
+ RegisterGroup Group;
+ unsigned Num;
SMLoc StartLoc, EndLoc;
};
bool parseRegister(Register &Reg);
- OperandMatchResultTy
- parseRegister(Register &Reg, char Prefix, const unsigned *Regs,
- bool IsAddress = false);
+ bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs,
+ bool IsAddress = false);
OperandMatchResultTy
parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- char Prefix, const unsigned *Regs,
- SystemZOperand::RegisterKind Kind,
- bool IsAddress = false);
+ RegisterGroup Group, const unsigned *Regs, RegisterKind Kind);
+
+ bool parseAddress(unsigned &Base, const MCExpr *&Disp,
+ unsigned &Index, const MCExpr *&Length,
+ const unsigned *Regs, RegisterKind RegKind);
OperandMatchResultTy
parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const unsigned *Regs, SystemZOperand::RegisterKind RegKind,
- bool HasIndex);
+ const unsigned *Regs, RegisterKind RegKind,
+ MemoryKind MemKind);
bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
StringRef Mnemonic);
public:
- SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
// Initialize the set of available features.
@@ -349,25 +356,31 @@ public:
// Used by the TableGen code to parse particular operand types.
OperandMatchResultTy
parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg);
+ return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
+ }
+ OperandMatchResultTy
+ parseGRH32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
+ }
+ OperandMatchResultTy
+ parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ llvm_unreachable("GRX32 should only be used for pseudo instructions");
}
OperandMatchResultTy
parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg);
+ return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
}
OperandMatchResultTy
parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg);
+ return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg);
}
OperandMatchResultTy
parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg,
- true);
+ return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg);
}
OperandMatchResultTy
parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg,
- true);
+ return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg);
}
OperandMatchResultTy
parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -375,30 +388,45 @@ public:
}
OperandMatchResultTy
parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg);
+ return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg);
}
OperandMatchResultTy
parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg);
+ return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg);
}
OperandMatchResultTy
parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg);
+ return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
}
OperandMatchResultTy
parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false);
+ return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem);
}
OperandMatchResultTy
parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false);
+ return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem);
}
OperandMatchResultTy
parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true);
+ return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem);
+ }
+ OperandMatchResultTy
+ parseBDLAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem);
}
OperandMatchResultTy
parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ OperandMatchResultTy
+ parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ int64_t MinVal, int64_t MaxVal);
+ OperandMatchResultTy
+ parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1);
+ }
+ OperandMatchResultTy
+ parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1);
+ }
};
}
@@ -417,122 +445,160 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
// Eat the % prefix.
if (Parser.getTok().isNot(AsmToken::Percent))
- return true;
+ return Error(Parser.getTok().getLoc(), "register expected");
Parser.Lex();
// Expect a register name.
if (Parser.getTok().isNot(AsmToken::Identifier))
- return true;
+ return Error(Reg.StartLoc, "invalid register");
- // Check the prefix.
+ // Check that there's a prefix.
StringRef Name = Parser.getTok().getString();
if (Name.size() < 2)
- return true;
- Reg.Prefix = Name[0];
+ return Error(Reg.StartLoc, "invalid register");
+ char Prefix = Name[0];
// Treat the rest of the register name as a register number.
- if (Name.substr(1).getAsInteger(10, Reg.Number))
- return true;
+ if (Name.substr(1).getAsInteger(10, Reg.Num))
+ return Error(Reg.StartLoc, "invalid register");
+
+ // Look for valid combinations of prefix and number.
+ if (Prefix == 'r' && Reg.Num < 16)
+ Reg.Group = RegGR;
+ else if (Prefix == 'f' && Reg.Num < 16)
+ Reg.Group = RegFP;
+ else if (Prefix == 'a' && Reg.Num < 16)
+ Reg.Group = RegAccess;
+ else
+ return Error(Reg.StartLoc, "invalid register");
Reg.EndLoc = Parser.getTok().getLoc();
Parser.Lex();
return false;
}
-// Parse a register with prefix Prefix and convert it to LLVM numbering.
-// Regs maps asm register numbers to LLVM register numbers, with zero
-// entries indicating an invalid register. IsAddress says whether the
-// register appears in an address context.
-SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseRegister(Register &Reg, char Prefix,
- const unsigned *Regs, bool IsAddress) {
+// Parse a register of group Group. If Regs is nonnull, use it to map
+// the raw register number to LLVM numbering, with zero entries indicating
+// an invalid register. IsAddress says whether the register appears in an
+// address context.
+bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
+ const unsigned *Regs, bool IsAddress) {
if (parseRegister(Reg))
- return MatchOperand_NoMatch;
- if (Reg.Prefix != Prefix || Reg.Number > 15 || Regs[Reg.Number] == 0) {
- Error(Reg.StartLoc, "invalid register");
- return MatchOperand_ParseFail;
- }
- if (Reg.Number == 0 && IsAddress) {
- Error(Reg.StartLoc, "%r0 used in an address");
- return MatchOperand_ParseFail;
- }
- Reg.Number = Regs[Reg.Number];
- return MatchOperand_Success;
+ return true;
+ if (Reg.Group != Group)
+ return Error(Reg.StartLoc, "invalid operand for instruction");
+ if (Regs && Regs[Reg.Num] == 0)
+ return Error(Reg.StartLoc, "invalid register pair");
+ if (Reg.Num == 0 && IsAddress)
+ return Error(Reg.StartLoc, "%r0 used in an address");
+ if (Regs)
+ Reg.Num = Regs[Reg.Num];
+ return false;
}
-// Parse a register and add it to Operands. Prefix is 'r' for GPRs,
-// 'f' for FPRs, etc. Regs maps asm register numbers to LLVM register numbers,
-// with zero entries indicating an invalid register. Kind is the type of
-// register represented by Regs and IsAddress says whether the register is
-// being parsed in an address context, meaning that %r0 evaluates as 0.
+// Parse a register and add it to Operands. The other arguments are as above.
SystemZAsmParser::OperandMatchResultTy
SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- char Prefix, const unsigned *Regs,
- SystemZOperand::RegisterKind Kind,
- bool IsAddress) {
+ RegisterGroup Group, const unsigned *Regs,
+ RegisterKind Kind) {
+ if (Parser.getTok().isNot(AsmToken::Percent))
+ return MatchOperand_NoMatch;
+
Register Reg;
- OperandMatchResultTy Result = parseRegister(Reg, Prefix, Regs, IsAddress);
- if (Result == MatchOperand_Success)
- Operands.push_back(SystemZOperand::createReg(Kind, Reg.Number,
- Reg.StartLoc, Reg.EndLoc));
- return Result;
-}
+ bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg);
+ if (parseRegister(Reg, Group, Regs, IsAddress))
+ return MatchOperand_ParseFail;
-// Parse a memory operand and add it to Operands. Regs maps asm register
-// numbers to LLVM address registers and RegKind says what kind of address
-// register we're using (ADDR32Reg or ADDR64Reg). HasIndex says whether
-// the address allows index registers.
-SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const unsigned *Regs,
- SystemZOperand::RegisterKind RegKind,
- bool HasIndex) {
- SMLoc StartLoc = Parser.getTok().getLoc();
+ Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num,
+ Reg.StartLoc, Reg.EndLoc));
+ return MatchOperand_Success;
+}
+// Parse a memory operand into Base, Disp, Index and Length.
+// Regs maps asm register numbers to LLVM register numbers and RegKind
+// says what kind of address register we're using (ADDR32Reg or ADDR64Reg).
+bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
+ unsigned &Index, const MCExpr *&Length,
+ const unsigned *Regs,
+ RegisterKind RegKind) {
// Parse the displacement, which must always be present.
- const MCExpr *Disp;
if (getParser().parseExpression(Disp))
- return MatchOperand_NoMatch;
+ return true;
// Parse the optional base and index.
- unsigned Index = 0;
- unsigned Base = 0;
+ Index = 0;
+ Base = 0;
+ Length = 0;
if (getLexer().is(AsmToken::LParen)) {
Parser.Lex();
- // Parse the first register.
- Register Reg;
- OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true);
- if (Result != MatchOperand_Success)
- return Result;
+ if (getLexer().is(AsmToken::Percent)) {
+ // Parse the first register and decide whether it's a base or an index.
+ Register Reg;
+ if (parseRegister(Reg, RegGR, Regs, RegKind))
+ return true;
+ if (getLexer().is(AsmToken::Comma))
+ Index = Reg.Num;
+ else
+ Base = Reg.Num;
+ } else {
+ // Parse the length.
+ if (getParser().parseExpression(Length))
+ return true;
+ }
- // Check whether there's a second register. If so, the one that we
- // just parsed was the index.
+ // Check whether there's a second register. It's the base if so.
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex();
-
- if (!HasIndex) {
- Error(Reg.StartLoc, "invalid use of indexed addressing");
- return MatchOperand_ParseFail;
- }
-
- Index = Reg.Number;
- Result = parseRegister(Reg, 'r', GR64Regs, true);
- if (Result != MatchOperand_Success)
- return Result;
+ Register Reg;
+ if (parseRegister(Reg, RegGR, Regs, RegKind))
+ return true;
+ Base = Reg.Num;
}
- Base = Reg.Number;
// Consume the closing bracket.
if (getLexer().isNot(AsmToken::RParen))
- return MatchOperand_NoMatch;
+ return Error(Parser.getTok().getLoc(), "unexpected token in address");
Parser.Lex();
}
+ return false;
+}
+
+// Parse a memory operand and add it to Operands. The other arguments
+// are as above.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ const unsigned *Regs, RegisterKind RegKind,
+ MemoryKind MemKind) {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ unsigned Base, Index;
+ const MCExpr *Disp;
+ const MCExpr *Length;
+ if (parseAddress(Base, Disp, Index, Length, Regs, RegKind))
+ return MatchOperand_ParseFail;
+
+ if (Index && MemKind != BDXMem)
+ {
+ Error(StartLoc, "invalid use of indexed addressing");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Length && MemKind != BDLMem)
+ {
+ Error(StartLoc, "invalid use of length addressing");
+ return MatchOperand_ParseFail;
+ }
+
+ if (!Length && MemKind == BDLMem)
+ {
+ Error(StartLoc, "missing length in address");
+ return MatchOperand_ParseFail;
+ }
SMLoc EndLoc =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index,
- StartLoc, EndLoc));
+ Length, StartLoc, EndLoc));
return MatchOperand_Success;
}
@@ -544,13 +610,14 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
Register Reg;
if (parseRegister(Reg))
- return Error(Reg.StartLoc, "register expected");
- if (Reg.Prefix == 'r' && Reg.Number < 16)
- RegNo = GR64Regs[Reg.Number];
- else if (Reg.Prefix == 'f' && Reg.Number < 16)
- RegNo = FP64Regs[Reg.Number];
+ return true;
+ if (Reg.Group == RegGR)
+ RegNo = SystemZMC::GR64Regs[Reg.Num];
+ else if (Reg.Group == RegFP)
+ RegNo = SystemZMC::FP64Regs[Reg.Num];
else
- return Error(Reg.StartLoc, "invalid register");
+ // FIXME: Access registers aren't modelled as LLVM registers yet.
+ return Error(Reg.StartLoc, "invalid operand for instruction");
StartLoc = Reg.StartLoc;
EndLoc = Reg.EndLoc;
return false;
@@ -604,15 +671,33 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
if (ResTy == MatchOperand_ParseFail)
return true;
- // The only other type of operand is an immediate.
- const MCExpr *Expr;
+ // Check for a register. All real register operands should have used
+ // a context-dependent parse routine, which gives the required register
+ // class. The code is here to mop up other cases, like those where
+ // the instruction isn't recognized.
+ if (Parser.getTok().is(AsmToken::Percent)) {
+ Register Reg;
+ if (parseRegister(Reg))
+ return true;
+ Operands.push_back(SystemZOperand::createInvalid(Reg.StartLoc, Reg.EndLoc));
+ return false;
+ }
+
+ // The only other type of operand is an immediate or address. As above,
+ // real address operands should have used a context-dependent parse routine,
+ // so we treat any plain expression as an immediate.
SMLoc StartLoc = Parser.getTok().getLoc();
- if (getParser().parseExpression(Expr))
+ unsigned Base, Index;
+ const MCExpr *Expr, *Length;
+ if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg))
return true;
SMLoc EndLoc =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+ if (Base || Index || Length)
+ Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc));
+ else
+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
return false;
}
@@ -671,15 +756,47 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- Register Reg;
- if (parseRegister(Reg))
+ if (Parser.getTok().isNot(AsmToken::Percent))
return MatchOperand_NoMatch;
- if (Reg.Prefix != 'a' || Reg.Number > 15) {
- Error(Reg.StartLoc, "invalid register");
+
+ Register Reg;
+ if (parseRegister(Reg, RegAccess, 0))
return MatchOperand_ParseFail;
+
+ Operands.push_back(SystemZOperand::createAccessReg(Reg.Num,
+ Reg.StartLoc,
+ Reg.EndLoc));
+ return MatchOperand_Success;
+}
+
+SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
+parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ int64_t MinVal, int64_t MaxVal) {
+ MCContext &Ctx = getContext();
+ MCStreamer &Out = getStreamer();
+ const MCExpr *Expr;
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ if (getParser().parseExpression(Expr))
+ return MatchOperand_NoMatch;
+
+ // For consistency with the GNU assembler, treat immediates as offsets
+ // from ".".
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
+ int64_t Value = CE->getValue();
+ if ((Value & 1) || Value < MinVal || Value > MaxVal) {
+ Error(StartLoc, "offset out of range");
+ return MatchOperand_ParseFail;
+ }
+ MCSymbol *Sym = Ctx.CreateTempSymbol();
+ Out.EmitLabel(Sym);
+ const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ Ctx);
+ Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx);
}
- Operands.push_back(SystemZOperand::createAccessReg(Reg.Number,
- Reg.StartLoc, Reg.EndLoc));
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
return MatchOperand_Success;
}
diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
new file mode 100644
index 0000000..fc3c38d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -0,0 +1,323 @@
+//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class SystemZDisassembler : public MCDisassembler {
+public:
+ SystemZDisassembler(const MCSubtargetInfo &STI)
+ : MCDisassembler(STI) {}
+ virtual ~SystemZDisassembler() {}
+
+ // Override MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const LLVM_OVERRIDE;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createSystemZDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new SystemZDisassembler(STI);
+}
+
+extern "C" void LLVMInitializeSystemZDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheSystemZTarget,
+ createSystemZDisassembler);
+}
+
+static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+ const unsigned *Regs) {
+ assert(RegNo < 16 && "Invalid register");
+ RegNo = Regs[RegNo];
+ if (RegNo == 0)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs);
+}
+
+static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs);
+}
+
+static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs);
+}
+
+static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs);
+}
+
+static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs);
+}
+
+template<unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ return MCDisassembler::Success;
+}
+
+template<unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeUImmOperand<4>(Inst, Imm);
+}
+
+static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<4>(Inst, Imm);
+}
+
+static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<6>(Inst, Imm);
+}
+
+static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<8>(Inst, Imm);
+}
+
+static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<16>(Inst, Imm);
+}
+
+static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<32>(Inst, Imm);
+}
+
+static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeSImmOperand<8>(Inst, Imm);
+}
+
+static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeSImmOperand<16>(Inst, Imm);
+}
+
+static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeSImmOperand<32>(Inst, Imm);
+}
+
+template<unsigned N>
+static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address) {
+ assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<16>(Inst, Imm, Address);
+}
+
+static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<32>(Inst, Imm, Address);
+}
+
+static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Base = Field >> 12;
+ uint64_t Disp = Field & 0xfff;
+ assert(Base < 16 && "Invalid BDAddr12");
+ Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::CreateImm(Disp));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Base = Field >> 20;
+ uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff);
+ assert(Base < 16 && "Invalid BDAddr20");
+ Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Index = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Index < 16 && "Invalid BDXAddr12");
+ Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::CreateImm(Disp));
+ Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Index = Field >> 24;
+ uint64_t Base = (Field >> 20) & 0xf;
+ uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12);
+ assert(Index < 16 && "Invalid BDXAddr20");
+ Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
+ Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Length = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Length < 256 && "Invalid BDLAddr12Len8");
+ Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::CreateImm(Disp));
+ Inst.addOperand(MCOperand::CreateImm(Length + 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
+ uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+#include "SystemZGenDisassemblerTables.inc"
+
+DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ // Get the first two bytes of the instruction.
+ uint8_t Bytes[6];
+ Size = 0;
+ if (Region.readBytes(Address, 2, Bytes) == -1)
+ return MCDisassembler::Fail;
+
+ // The top 2 bits of the first byte specify the size.
+ const uint8_t *Table;
+ if (Bytes[0] < 0x40) {
+ Size = 2;
+ Table = DecoderTable16;
+ } else if (Bytes[0] < 0xc0) {
+ Size = 4;
+ Table = DecoderTable32;
+ } else {
+ Size = 6;
+ Table = DecoderTable48;
+ }
+
+ // Read any remaining bytes.
+ if (Size > 2 && Region.readBytes(Address + 2, Size - 2, Bytes + 2) == -1)
+ return MCDisassembler::Fail;
+
+ // Construct the instruction.
+ uint64_t Inst = 0;
+ for (uint64_t I = 0; I < Size; ++I)
+ Inst = (Inst << 8) | Bytes[I];
+
+ return decodeInstruction(Table, MI, Inst, Address, this, STI);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index d73cf49..e1e64d3 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -114,10 +114,14 @@ void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
O << "%a" << (unsigned int)Value;
}
-void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
- raw_ostream &O) {
- printOperand(MI, OpNum, O);
- O << "@PLT";
+void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.isImm()) {
+ O << "0x";
+ O.write_hex(MO.getImm());
+ } else
+ O << *MO.getExpr();
}
void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
@@ -138,6 +142,17 @@ void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
MI->getOperand(OpNum + 2).getReg(), O);
}
+void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned Base = MI->getOperand(OpNum).getReg();
+ uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+ uint64_t Length = MI->getOperand(OpNum + 2).getImm();
+ O << Disp << '(' << Length;
+ if (Base)
+ O << ",%" << getRegisterName(Base);
+ O << ')';
+}
+
void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
raw_ostream &O) {
static const char *const CondNames[] = {
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index b82e79d..734ecf0 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -48,6 +48,7 @@ private:
void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
@@ -56,7 +57,7 @@ private:
void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
- void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
// Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index e901c6c..26a8fae 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -35,16 +35,6 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
llvm_unreachable("Unknown fixup kind!");
}
-// If Opcode can be relaxed, return the relaxed form, otherwise return 0.
-static unsigned getRelaxedOpcode(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::BRC: return SystemZ::BRCL;
- case SystemZ::J: return SystemZ::JG;
- case SystemZ::BRAS: return SystemZ::BRASL;
- }
- return 0;
-}
-
namespace {
class SystemZMCAsmBackend : public MCAsmBackend {
uint8_t OSABI;
@@ -60,14 +50,20 @@ public:
LLVM_OVERRIDE;
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const LLVM_OVERRIDE;
- virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE;
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE {
+ return false;
+ }
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *Fragment,
const MCAsmLayout &Layout) const
- LLVM_OVERRIDE;
+ LLVM_OVERRIDE {
+ return false;
+ }
virtual void relaxInstruction(const MCInst &Inst,
- MCInst &Res) const LLVM_OVERRIDE;
+ MCInst &Res) const LLVM_OVERRIDE {
+ llvm_unreachable("SystemZ does do not have assembler relaxation");
+ }
virtual bool writeNopData(uint64_t Count,
MCObjectWriter *OW) const LLVM_OVERRIDE;
virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const
@@ -115,28 +111,6 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
-bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
- return getRelaxedOpcode(Inst.getOpcode()) != 0;
-}
-
-bool
-SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value,
- const MCRelaxableFragment *Fragment,
- const MCAsmLayout &Layout) const {
- // At the moment we just need to relax 16-bit fields to wider fields.
- Value = extractBitsForFixup(Fixup.getKind(), Value);
- return (int16_t)Value != (int64_t)Value;
-}
-
-void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst,
- MCInst &Res) const {
- unsigned Opcode = getRelaxedOpcode(Inst.getOpcode());
- assert(Opcode && "Unexpected insn to relax");
- Res = Inst;
- Res.setOpcode(Opcode);
-}
-
bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
MCObjectWriter *OW) const {
for (uint64_t I = 0; I != Count; ++I)
@@ -144,8 +118,9 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
return true;
}
-MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT,
- StringRef CPU) {
+MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
return new SystemZMCAsmBackend(OSABI);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index c96a0d4..965c41e 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -13,16 +13,14 @@
using namespace llvm;
-SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) {
PointerSize = 8;
CalleeSaveStackSlotSize = 8;
IsLittleEndian = false;
CommentString = "#";
- PCSymbol = ".";
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
ZeroDirective = "\t.space\t";
Data64bitsDirective = "\t.quad\t";
UsesELFSectionDirectiveForBSS = true;
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index bac1bca..b9ac92a 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -10,16 +10,15 @@
#ifndef SystemZTARGETASMINFO_H
#define SystemZTARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
-class Target;
class StringRef;
-class SystemZMCAsmInfo : public MCAsmInfo {
+class SystemZMCAsmInfo : public MCAsmInfoELF {
public:
- explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+ explicit SystemZMCAsmInfo(StringRef TT);
// Override MCAsmInfo;
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index ea2250f..f07ea7b 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -45,33 +45,40 @@ private:
// Called by the TableGen code to get the binary encoding of operand
// MO in MI. Fixups is the list of fixups against MI.
- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const;
+ // Called by the TableGen code to get the binary encoding of an address.
+ // The index or length, if any, is encoded first, followed by the base,
+ // followed by the displacement. In a 20-bit displacement,
+ // the low 12 bits are encoded before the high 8 bits.
+ uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
// Operand OpNum of MI needs a PC-relative fixup of kind Kind at
// Offset bytes from the start of MI. Add the fixup to Fixups
// and return the in-place addend, which since we're a RELA target
// is always 0.
- unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+ uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
unsigned Kind, int64_t Offset) const;
- unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+ uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups) const {
return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
}
- unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+ uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups) const {
return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
}
- unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2);
- }
- unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2);
- }
};
}
@@ -95,34 +102,83 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
}
-unsigned SystemZMCCodeEmitter::
+uint64_t SystemZMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg())
- return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
+ return static_cast<uint64_t>(MO.getImm());
llvm_unreachable("Unexpected operand type!");
}
-unsigned
-SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+uint64_t SystemZMCCodeEmitter::
+getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+ assert(isUInt<4>(Base) && isUInt<12>(Disp));
+ return (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+ assert(isUInt<4>(Base) && isInt<20>(Disp));
+ return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12);
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups);
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index));
+ return (Index << 16) | (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups);
+ assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index));
+ return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8)
+ | ((Disp & 0xff000) >> 12);
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups);
+ uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups) - 1;
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len));
+ return (Len << 16) | (Base << 12) | Disp;
+}
+
+uint64_t
+SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
unsigned Kind, int64_t Offset) const {
const MCOperand &MO = MI.getOperand(OpNum);
- // For compatibility with the GNU assembler, treat constant operands as
- // unadjusted PC-relative offsets.
+ const MCExpr *Expr;
if (MO.isImm())
- return MO.getImm() / 2;
-
- const MCExpr *Expr = MO.getExpr();
- if (Offset) {
- // The operand value is relative to the start of MI, but the fixup
- // is relative to the operand field itself, which is Offset bytes
- // into MI. Add Offset to the relocation value to cancel out
- // this difference.
- const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
- Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx);
+ else {
+ Expr = MO.getExpr();
+ if (Offset) {
+ // The operand value is relative to the start of MI, but the fixup
+ // is relative to the operand field itself, which is Offset bytes
+ // into MI. Add Offset to the relocation value to cancel out
+ // this difference.
+ const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+ Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ }
}
Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
return 0;
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 49a7f47..9e1296b 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -27,11 +27,80 @@
using namespace llvm;
-static MCAsmInfo *createSystemZMCAsmInfo(const Target &T, StringRef TT) {
- MCAsmInfo *MAI = new SystemZMCAsmInfo(T, TT);
- MachineLocation FPDst(MachineLocation::VirtualFP);
- MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP);
- MAI->addInitialFrameState(0, FPDst, FPSrc);
+const unsigned SystemZMC::GR32Regs[16] = {
+ SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L,
+ SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L,
+ SystemZ::R8L, SystemZ::R9L, SystemZ::R10L, SystemZ::R11L,
+ SystemZ::R12L, SystemZ::R13L, SystemZ::R14L, SystemZ::R15L
+};
+
+const unsigned SystemZMC::GRH32Regs[16] = {
+ SystemZ::R0H, SystemZ::R1H, SystemZ::R2H, SystemZ::R3H,
+ SystemZ::R4H, SystemZ::R5H, SystemZ::R6H, SystemZ::R7H,
+ SystemZ::R8H, SystemZ::R9H, SystemZ::R10H, SystemZ::R11H,
+ SystemZ::R12H, SystemZ::R13H, SystemZ::R14H, SystemZ::R15H
+};
+
+const unsigned SystemZMC::GR64Regs[16] = {
+ SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
+ SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
+ SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
+ SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
+};
+
+const unsigned SystemZMC::GR128Regs[16] = {
+ SystemZ::R0Q, 0, SystemZ::R2Q, 0,
+ SystemZ::R4Q, 0, SystemZ::R6Q, 0,
+ SystemZ::R8Q, 0, SystemZ::R10Q, 0,
+ SystemZ::R12Q, 0, SystemZ::R14Q, 0
+};
+
+const unsigned SystemZMC::FP32Regs[16] = {
+ SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+ SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
+};
+
+const unsigned SystemZMC::FP64Regs[16] = {
+ SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+ SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
+};
+
+const unsigned SystemZMC::FP128Regs[16] = {
+ SystemZ::F0Q, SystemZ::F1Q, 0, 0,
+ SystemZ::F4Q, SystemZ::F5Q, 0, 0,
+ SystemZ::F8Q, SystemZ::F9Q, 0, 0,
+ SystemZ::F12Q, SystemZ::F13Q, 0, 0
+};
+
+unsigned SystemZMC::getFirstReg(unsigned Reg) {
+ static unsigned Map[SystemZ::NUM_TARGET_REGS];
+ static bool Initialized = false;
+ if (!Initialized) {
+ for (unsigned I = 0; I < 16; ++I) {
+ Map[GR32Regs[I]] = I;
+ Map[GRH32Regs[I]] = I;
+ Map[GR64Regs[I]] = I;
+ Map[GR128Regs[I]] = I;
+ Map[FP32Regs[I]] = I;
+ Map[FP64Regs[I]] = I;
+ Map[FP128Regs[I]] = I;
+ }
+ }
+ assert(Reg < SystemZ::NUM_TARGET_REGS);
+ return Map[Reg];
+}
+
+static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
+ StringRef TT) {
+ MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(SystemZ::R15D, true),
+ SystemZMC::CFAOffsetFromInitialSP);
+ MAI->addInitialFrameState(Inst);
return MAI;
}
@@ -118,7 +187,7 @@ static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
MCCodeEmitter *Emitter,
bool RelaxAll,
bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, 0, MAB, OS, Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeSystemZTargetMC() {
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 229912f..97e325b 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -34,6 +34,39 @@ namespace SystemZMC {
// The offset of the DWARF CFA from the incoming stack pointer.
const int64_t CFAOffsetFromInitialSP = CallFrameSize;
+
+ // Maps of asm register numbers to LLVM register numbers, with 0 indicating
+ // an invalid register. In principle we could use 32-bit and 64-bit register
+ // classes directly, provided that we relegated the GPR allocation order
+ // in SystemZRegisterInfo.td to an AltOrder and left the default order
+ // as %r0-%r15. It seems better to provide the same interface for
+ // all classes though.
+ extern const unsigned GR32Regs[16];
+ extern const unsigned GRH32Regs[16];
+ extern const unsigned GR64Regs[16];
+ extern const unsigned GR128Regs[16];
+ extern const unsigned FP32Regs[16];
+ extern const unsigned FP64Regs[16];
+ extern const unsigned FP128Regs[16];
+
+ // Return the 0-based number of the first architectural register that
+ // contains the given LLVM register. E.g. R1D -> 1.
+ unsigned getFirstReg(unsigned Reg);
+
+ // Return the given register as a GR64.
+ inline unsigned getRegAsGR64(unsigned Reg) {
+ return GR64Regs[getFirstReg(Reg)];
+ }
+
+ // Return the given register as a low GR32.
+ inline unsigned getRegAsGR32(unsigned Reg) {
+ return GR32Regs[getFirstReg(Reg)];
+ }
+
+ // Return the given register as a high GR32.
+ inline unsigned getRegAsGRH32(unsigned Reg) {
+ return GRH32Regs[getFirstReg(Reg)];
+ }
}
MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
@@ -41,8 +74,9 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt
index d1f56a4..afa6cf0 100644
--- a/contrib/llvm/lib/Target/SystemZ/README.txt
+++ b/contrib/llvm/lib/Target/SystemZ/README.txt
@@ -29,7 +29,7 @@ to load 103. This seems to be a general target-independent problem.
--
-The tuning of the choice between Load Address (LA) and addition in
+The tuning of the choice between LOAD ADDRESS (LA) and addition in
SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on
performance measurements.
@@ -39,22 +39,35 @@ There is no scheduling support.
--
-We don't use the Branch on Count or Branch on Index families of instruction.
+We don't use the BRANCH ON INDEX instructions.
--
-We don't use the condition code results of anything except comparisons.
+We might want to use BRANCH ON CONDITION for conditional indirect calls
+and conditional returns.
-Implementing this may need something more finely grained than the z_cmp
-and z_ucmp that we have now. It might (or might not) also be useful to
-have a mask of "don't care" values in conditional branches. For example,
-integer comparisons never set CC to 3, so the bottom bit of the CC mask
-isn't particularly relevant. JNLH and JE are equally good for testing
-equality after an integer comparison, etc.
+--
+
+We don't use the TEST DATA CLASS instructions.
+
+--
+
+We could use the generic floating-point forms of LOAD COMPLEMENT,
+LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the
+condition codes. For example, we could use LCDFR instead of LCDBR.
--
-We don't optimize string and block memory operations.
+We only use MVC, XC and CLC for constant-length block operations.
+We could extend them to variable-length operations too,
+using EXECUTE RELATIVE LONG.
+
+MVCIN, MVCLE and CLCLE may be worthwhile too.
+
+--
+
+We don't use CUSE or the TRANSLATE family of instructions for string
+operations. The TRANSLATE ones are probably more difficult to exploit.
--
@@ -63,9 +76,21 @@ conventions require f128s to be returned by invisible reference.
--
-DAGCombiner can detect integer absolute, but there's not yet an associated
-ISD opcode. We could add one and implement it using Load Positive.
-Negated absolutes could use Load Negative.
+ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to
+produce a carry. SUBTRACT LOGICAL IMMEDIATE could be useful when we
+need to produce a borrow. (Note that there are no memory forms of
+ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high
+part of 128-bit memory operations would probably need to be done
+via a register.)
+
+--
+
+We don't use the halfword forms of LOAD REVERSED and STORE REVERSED
+(LRVH and STRVH).
+
+--
+
+We don't use ICM or STCM.
--
@@ -142,5 +167,15 @@ See CodeGen/SystemZ/alloca-01.ll for an example.
--
Atomic loads and stores use the default compare-and-swap based implementation.
-This is probably much too conservative in practice, and the overhead is
-especially bad for 8- and 16-bit accesses.
+This is much too conservative in practice, since the architecture guarantees
+that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are
+inherently atomic.
+
+--
+
+If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG.
+
+--
+
+We might want to model all access registers and use them to spill
+32-bit values.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
index b811cbe..dcebbad 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -30,16 +30,51 @@ namespace llvm {
const unsigned CCMASK_3 = 1 << 0;
const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
- // Condition-code mask assignments for floating-point comparisons.
+ // Condition-code mask assignments for integer and floating-point
+ // comparisons.
const unsigned CCMASK_CMP_EQ = CCMASK_0;
const unsigned CCMASK_CMP_LT = CCMASK_1;
const unsigned CCMASK_CMP_GT = CCMASK_2;
- const unsigned CCMASK_CMP_UO = CCMASK_3;
const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT;
const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT;
const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT;
+
+ // Condition-code mask assignments for floating-point comparisons only.
+ const unsigned CCMASK_CMP_UO = CCMASK_3;
const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO;
+ // All condition-code values produced by comparisons.
+ const unsigned CCMASK_ICMP = CCMASK_0 | CCMASK_1 | CCMASK_2;
+ const unsigned CCMASK_FCMP = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
+
+ // Condition-code mask assignments for CS.
+ const unsigned CCMASK_CS_EQ = CCMASK_0;
+ const unsigned CCMASK_CS_NE = CCMASK_1;
+ const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1;
+
+ // Condition-code mask assignments for a completed SRST loop.
+ const unsigned CCMASK_SRST_FOUND = CCMASK_1;
+ const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
+ const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
+
+ // Condition-code mask assignments for TEST UNDER MASK.
+ const unsigned CCMASK_TM_ALL_0 = CCMASK_0;
+ const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1;
+ const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2;
+ const unsigned CCMASK_TM_ALL_1 = CCMASK_3;
+ const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY;
+ const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY;
+ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1;
+ const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
+ const unsigned CCMASK_TM = CCMASK_ANY;
+
+ // The position of the low CC bit in an IPM result.
+ const unsigned IPM_CC = 28;
+
+ // Mask assignments for PFD.
+ const unsigned PFD_READ = 1;
+ const unsigned PFD_WRITE = 2;
+
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;
@@ -73,5 +108,8 @@ namespace llvm {
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
+ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
+ FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
} // end namespace llvm;
#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
index e03c32f..abf5c8e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZ.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -14,13 +14,10 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
-// SystemZ supported processors
+// SystemZ supported processors and features
//===----------------------------------------------------------------------===//
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
-
-def : Proc<"z10", []>;
+include "SystemZProcessors.td"
//===----------------------------------------------------------------------===//
// Register file description
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 1e15ab1..75cbda4 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -19,16 +19,142 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
using namespace llvm;
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GR32s.
+static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GRH32s.
+static MCInst lowerRIHigh(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// R2 register turned into a GR64.
+static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()))
+ .addImm(MI->getOperand(3).getImm())
+ .addImm(MI->getOperand(4).getImm())
+ .addImm(MI->getOperand(5).getImm());
+}
+
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
- Lower.lower(MI, LoweredMI);
+ switch (MI->getOpcode()) {
+ case SystemZ::Return:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D);
+ break;
+
+ case SystemZ::CallBRASL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBASR:
+ LoweredMI = MCInstBuilder(SystemZ::BASR)
+ .addReg(SystemZ::R14D)
+ .addReg(MI->getOperand(0).getReg());
+ break;
+
+ case SystemZ::CallJG:
+ LoweredMI = MCInstBuilder(SystemZ::JG)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBR:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
+ break;
+
+ case SystemZ::IILF64:
+ LoweredMI = MCInstBuilder(SystemZ::IILF)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::IIHF64:
+ LoweredMI = MCInstBuilder(SystemZ::IIHF)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::RISBHH:
+ case SystemZ::RISBHL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG);
+ break;
+
+ case SystemZ::RISBLH:
+ case SystemZ::RISBLL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
+ break;
+
+#define LOWER_LOW(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break
+
+ LOWER_LOW(IILL);
+ LOWER_LOW(IILH);
+ LOWER_LOW(TMLL);
+ LOWER_LOW(TMLH);
+ LOWER_LOW(NILL);
+ LOWER_LOW(NILH);
+ LOWER_LOW(NILF);
+ LOWER_LOW(OILL);
+ LOWER_LOW(OILH);
+ LOWER_LOW(OILF);
+ LOWER_LOW(XILF);
+
+#undef LOWER_LOW
+
+#define LOWER_HIGH(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRIHigh(MI, SystemZ::NAME); break
+
+ LOWER_HIGH(IIHL);
+ LOWER_HIGH(IIHH);
+ LOWER_HIGH(TMHL);
+ LOWER_HIGH(TMHH);
+ LOWER_HIGH(NIHL);
+ LOWER_HIGH(NIHH);
+ LOWER_HIGH(NIHF);
+ LOWER_HIGH(OIHL);
+ LOWER_HIGH(OIHH);
+ LOWER_HIGH(OIHF);
+ LOWER_HIGH(XIHF);
+
+#undef LOWER_HIGH
+
+ default:
+ Lower.lower(MI, LoweredMI);
+ break;
+ }
OutStreamer.EmitInstruction(LoweredMI);
}
@@ -48,7 +174,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
static_cast<SystemZConstantPoolValue*>(MCPV);
const MCExpr *Expr =
- MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()),
+ MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
@@ -66,7 +192,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
return true;
OS << -int64_t(MI->getOperand(OpNo).getImm());
} else {
- SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ SystemZMCInstLower Lower(MF->getContext(), *this);
MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
SystemZInstPrinter::printOperand(MO, OS);
}
@@ -100,7 +226,7 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(0), 0);
+ TD->getPointerSize(0));
}
Stubs.clear();
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index c2d727f..c4f641e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -23,7 +23,7 @@ def RetCC_SystemZ : CallingConv<[
// call-clobbered argument registers available for code that doesn't
// care about the ABI. (R6 is an argument register too, but is
// call-saved and therefore not suitable for return values.)
- CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>,
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L]>>,
CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
// ABI-complaint code returns float and double in F0. Make the
@@ -53,7 +53,7 @@ def CC_SystemZ : CallingConv<[
// The first 5 integer arguments are passed in R2-R6. Note that R6
// is call-saved.
- CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>,
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L, R6L]>>,
CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
// The first 4 float and double arguments are passed in even registers F0-F6.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index e9c4f6d..6c70811 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -39,7 +39,7 @@ unsigned SystemZConstantPoolValue::getRelocationInfo() const {
int SystemZConstantPoolValue::
getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
if (Constants[I].isMachineConstantPoolEntry() &&
(Constants[I].getAlignment() & AlignMask) == 0) {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
new file mode 100644
index 0000000..b8a77db
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -0,0 +1,471 @@
+//===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass:
+// (1) tries to remove compares if CC already contains the required information
+// (2) fuses compares and branches into COMPARE AND BRANCH instructions
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-elim-compare"
+
+#include "SystemZTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(BranchOnCounts, "Number of branch-on-count instructions");
+STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
+STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
+
+namespace {
+ // Represents the references to a particular register in one or more
+ // instructions.
+ struct Reference {
+ Reference()
+ : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {}
+
+ Reference &operator|=(const Reference &Other) {
+ Def |= Other.Def;
+ IndirectDef |= Other.IndirectDef;
+ Use |= Other.Use;
+ IndirectUse |= Other.IndirectUse;
+ return *this;
+ }
+
+ operator bool() const { return Def || Use; }
+
+ // True if the register is defined or used in some form, either directly or
+ // via a sub- or super-register.
+ bool Def;
+ bool Use;
+
+ // True if the register is defined or used indirectly, by a sub- or
+ // super-register.
+ bool IndirectDef;
+ bool IndirectUse;
+ };
+
+ class SystemZElimCompare : public MachineFunctionPass {
+ public:
+ static char ID;
+ SystemZElimCompare(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(0), TRI(0) {}
+
+ virtual const char *getPassName() const {
+ return "SystemZ Comparison Elimination";
+ }
+
+ bool processBlock(MachineBasicBlock *MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ Reference getRegReferences(MachineInstr *MI, unsigned Reg);
+ bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool convertToLoadAndTest(MachineInstr *MI);
+ bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool optimizeCompareZero(MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool fuseCompareAndBranch(MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+
+ const SystemZInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ };
+
+ char SystemZElimCompare::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
+ return new SystemZElimCompare(TM);
+}
+
+// Return true if CC is live out of MBB.
+static bool isCCLiveOut(MachineBasicBlock *MBB) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(SystemZ::CC))
+ return true;
+ return false;
+}
+
+// Return true if any CC result of MI would reflect the value of subreg
+// SubReg of Reg.
+static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
+ if (MI->getNumOperands() > 0 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isDef() &&
+ MI->getOperand(0).getReg() == Reg &&
+ MI->getOperand(0).getSubReg() == SubReg)
+ return true;
+
+ switch (MI->getOpcode()) {
+ case SystemZ::LR:
+ case SystemZ::LGR:
+ case SystemZ::LGFR:
+ case SystemZ::LTR:
+ case SystemZ::LTGR:
+ case SystemZ::LTGFR:
+ case SystemZ::LER:
+ case SystemZ::LDR:
+ case SystemZ::LXR:
+ case SystemZ::LTEBR:
+ case SystemZ::LTDBR:
+ case SystemZ::LTXBR:
+ if (MI->getOperand(1).getReg() == Reg &&
+ MI->getOperand(1).getSubReg() == SubReg)
+ return true;
+ }
+
+ return false;
+}
+
+// Describe the references to Reg in MI, including sub- and super-registers.
+Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
+ Reference Ref;
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (MO.isReg()) {
+ if (unsigned MOReg = MO.getReg()) {
+ if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) {
+ if (MO.isUse()) {
+ Ref.Use = true;
+ Ref.IndirectUse |= (MOReg != Reg);
+ }
+ if (MO.isDef()) {
+ Ref.Def = true;
+ Ref.IndirectDef |= (MOReg != Reg);
+ }
+ }
+ }
+ }
+ }
+ return Ref;
+}
+
+// Compare compares the result of MI against zero. If MI is an addition
+// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
+// and convert the branch to a BRCT(G). Return true on success.
+bool
+SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ // Check whether we have an addition of -1.
+ unsigned Opcode = MI->getOpcode();
+ unsigned BRCT;
+ if (Opcode == SystemZ::AHI)
+ BRCT = SystemZ::BRCT;
+ else if (Opcode == SystemZ::AGHI)
+ BRCT = SystemZ::BRCTG;
+ else
+ return false;
+ if (MI->getOperand(2).getImm() != -1)
+ return false;
+
+ // Check whether we have a single JLH.
+ if (CCUsers.size() != 1)
+ return false;
+ MachineInstr *Branch = CCUsers[0];
+ if (Branch->getOpcode() != SystemZ::BRC ||
+ Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP ||
+ Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE)
+ return false;
+
+ // We already know that there are no references to the register between
+ // MI and Compare. Make sure that there are also no references between
+ // Compare and Branch.
+ unsigned SrcReg = Compare->getOperand(0).getReg();
+ MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ if (getRegReferences(MBBI, SrcReg))
+ return false;
+
+ // The transformation is OK. Rebuild Branch as a BRCT(G).
+ MachineOperand Target(Branch->getOperand(2));
+ Branch->RemoveOperand(2);
+ Branch->RemoveOperand(1);
+ Branch->RemoveOperand(0);
+ Branch->setDesc(TII->get(BRCT));
+ MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(Target)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine);
+ MI->removeFromParent();
+ return true;
+}
+
+// If MI is a load instruction, try to convert it into a LOAD AND TEST.
+// Return true on success.
+bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) {
+ unsigned Opcode = TII->getLoadAndTest(MI->getOpcode());
+ if (!Opcode)
+ return false;
+
+ MI->setDesc(TII->get(Opcode));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine);
+ return true;
+}
+
+// The CC users in CCUsers are testing the result of a comparison of some
+// value X against zero and we know that any CC value produced by MI
+// would also reflect the value of X. Try to adjust CCUsers so that
+// they test the result of MI directly, returning true on success.
+// Leave everything unchanged on failure.
+bool SystemZElimCompare::
+adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ int Opcode = MI->getOpcode();
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ unsigned MIFlags = Desc.TSFlags;
+
+ // See which compare-style condition codes are available.
+ unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
+
+ // For unsigned comparisons with zero, only equality makes sense.
+ unsigned CompareFlags = Compare->getDesc().TSFlags;
+ if (CompareFlags & SystemZII::IsLogical)
+ ReusableCCMask &= SystemZ::CCMASK_CMP_EQ;
+
+ if (ReusableCCMask == 0)
+ return false;
+
+ unsigned CCValues = SystemZII::getCCValues(MIFlags);
+ assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
+
+ // Now check whether these flags are enough for all users.
+ SmallVector<MachineOperand *, 4> AlterMasks;
+ for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) {
+ MachineInstr *MI = CCUsers[I];
+
+ // Fail if this isn't a use of CC that we understand.
+ unsigned Flags = MI->getDesc().TSFlags;
+ unsigned FirstOpNum;
+ if (Flags & SystemZII::CCMaskFirst)
+ FirstOpNum = 0;
+ else if (Flags & SystemZII::CCMaskLast)
+ FirstOpNum = MI->getNumExplicitOperands() - 2;
+ else
+ return false;
+
+ // Check whether the instruction predicate treats all CC values
+ // outside of ReusableCCMask in the same way. In that case it
+ // doesn't matter what those CC values mean.
+ unsigned CCValid = MI->getOperand(FirstOpNum).getImm();
+ unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm();
+ unsigned OutValid = ~ReusableCCMask & CCValid;
+ unsigned OutMask = ~ReusableCCMask & CCMask;
+ if (OutMask != 0 && OutMask != OutValid)
+ return false;
+
+ AlterMasks.push_back(&MI->getOperand(FirstOpNum));
+ AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1));
+ }
+
+ // All users are OK. Adjust the masks for MI.
+ for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
+ AlterMasks[I]->setImm(CCValues);
+ unsigned CCMask = AlterMasks[I + 1]->getImm();
+ if (CCMask & ~ReusableCCMask)
+ AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
+ (CCValues & ~ReusableCCMask));
+ }
+
+ // CC is now live after MI.
+ int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI);
+ assert(CCDef >= 0 && "Couldn't find CC set");
+ MI->getOperand(CCDef).setIsDead(false);
+
+ // Clear any intervening kills of CC.
+ MachineBasicBlock::iterator MBBI = MI, MBBE = Compare;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ MBBI->clearRegisterKills(SystemZ::CC, TRI);
+
+ return true;
+}
+
+// Return true if Compare is a comparison against zero.
+static bool isCompareZero(MachineInstr *Compare) {
+ switch (Compare->getOpcode()) {
+ case SystemZ::LTEBRCompare:
+ case SystemZ::LTDBRCompare:
+ case SystemZ::LTXBRCompare:
+ return true;
+
+ default:
+ return (Compare->getNumExplicitOperands() == 2 &&
+ Compare->getOperand(1).isImm() &&
+ Compare->getOperand(1).getImm() == 0);
+ }
+}
+
+// Try to optimize cases where comparison instruction Compare is testing
+// a value against zero. Return true on success and if Compare should be
+// deleted as dead. CCUsers is the list of instructions that use the CC
+// value produced by Compare.
+bool SystemZElimCompare::
+optimizeCompareZero(MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ if (!isCompareZero(Compare))
+ return false;
+
+ // Search back for CC results that are based on the first operand.
+ unsigned SrcReg = Compare->getOperand(0).getReg();
+ unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
+ MachineBasicBlock *MBB = Compare->getParent();
+ MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->begin();
+ Reference CCRefs;
+ Reference SrcRefs;
+ while (MBBI != MBBE) {
+ --MBBI;
+ MachineInstr *MI = MBBI;
+ if (resultTests(MI, SrcReg, SrcSubReg)) {
+ // Try to remove both MI and Compare by converting a branch to BRCT(G).
+ // We don't care in this case whether CC is modified between MI and
+ // Compare.
+ if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) {
+ BranchOnCounts += 1;
+ return true;
+ }
+ // Try to eliminate Compare by reusing a CC result from MI.
+ if ((!CCRefs && convertToLoadAndTest(MI)) ||
+ (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) {
+ EliminatedComparisons += 1;
+ return true;
+ }
+ }
+ SrcRefs |= getRegReferences(MI, SrcReg);
+ if (SrcRefs.Def)
+ return false;
+ CCRefs |= getRegReferences(MI, SystemZ::CC);
+ if (CCRefs.Use && CCRefs.Def)
+ return false;
+ }
+ return false;
+}
+
+// Try to fuse comparison instruction Compare into a later branch.
+// Return true on success and if Compare is therefore redundant.
+bool SystemZElimCompare::
+fuseCompareAndBranch(MachineInstr *Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ // See whether we have a comparison that can be fused.
+ unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
+ Compare);
+ if (!FusedOpcode)
+ return false;
+
+ // See whether we have a single branch with which to fuse.
+ if (CCUsers.size() != 1)
+ return false;
+ MachineInstr *Branch = CCUsers[0];
+ if (Branch->getOpcode() != SystemZ::BRC)
+ return false;
+
+ // Make sure that the operands are available at the branch.
+ unsigned SrcReg = Compare->getOperand(0).getReg();
+ unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
+ Compare->getOperand(1).getReg() : 0);
+ MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ if (MBBI->modifiesRegister(SrcReg, TRI) ||
+ (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
+ return false;
+
+ // Read the branch mask and target.
+ MachineOperand CCMask(MBBI->getOperand(1));
+ MachineOperand Target(MBBI->getOperand(2));
+ assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
+ "Invalid condition-code mask for integer comparison");
+
+ // Clear out all current operands.
+ int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
+ assert(CCUse >= 0 && "BRC must use CC");
+ Branch->RemoveOperand(CCUse);
+ Branch->RemoveOperand(2);
+ Branch->RemoveOperand(1);
+ Branch->RemoveOperand(0);
+
+ // Rebuild Branch as a fused compare and branch.
+ Branch->setDesc(TII->get(FusedOpcode));
+ MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
+ .addOperand(Compare->getOperand(0))
+ .addOperand(Compare->getOperand(1))
+ .addOperand(CCMask)
+ .addOperand(Target)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine);
+
+ // Clear any intervening kills of SrcReg and SrcReg2.
+ MBBI = Compare;
+ for (++MBBI; MBBI != MBBE; ++MBBI) {
+ MBBI->clearRegisterKills(SrcReg, TRI);
+ if (SrcReg2)
+ MBBI->clearRegisterKills(SrcReg2, TRI);
+ }
+ FusedComparisons += 1;
+ return true;
+}
+
+// Process all comparison instructions in MBB. Return true if something
+// changed.
+bool SystemZElimCompare::processBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ // Walk backwards through the block looking for comparisons, recording
+ // all CC users as we go. The subroutines can delete Compare and
+ // instructions before it.
+ bool CompleteCCUsers = !isCCLiveOut(MBB);
+ SmallVector<MachineInstr *, 4> CCUsers;
+ MachineBasicBlock::iterator MBBI = MBB->end();
+ while (MBBI != MBB->begin()) {
+ MachineInstr *MI = --MBBI;
+ if (CompleteCCUsers &&
+ MI->isCompare() &&
+ (optimizeCompareZero(MI, CCUsers) ||
+ fuseCompareAndBranch(MI, CCUsers))) {
+ ++MBBI;
+ MI->removeFromParent();
+ Changed = true;
+ CCUsers.clear();
+ CompleteCCUsers = true;
+ continue;
+ }
+
+ Reference CCRefs(getRegReferences(MI, SystemZ::CC));
+ if (CCRefs.Def) {
+ CCUsers.clear();
+ CompleteCCUsers = !CCRefs.IndirectDef;
+ }
+ if (CompleteCCUsers && CCRefs.Use)
+ CCUsers.push_back(MI);
+ }
+ return Changed;
+}
+
+bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ TRI = &TII->getRegisterInfo();
+
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
+ MFI != MFE; ++MFI)
+ Changed |= processBlock(MFI);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index fda33de..acfb491 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -14,19 +14,15 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
using namespace llvm;
-SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
- const SystemZSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
- -SystemZMC::CallFrameSize),
- TM(tm),
- STI(sti) {
+namespace {
// The ABI-defined register save slots, relative to the incoming stack
// pointer.
- static const unsigned SpillOffsetTable[][2] = {
+ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
{ SystemZ::R2D, 0x10 },
{ SystemZ::R3D, 0x18 },
{ SystemZ::R4D, 0x20 },
@@ -46,11 +42,23 @@ SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
{ SystemZ::F4D, 0x90 },
{ SystemZ::F6D, 0x98 }
};
+}
+SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
+ const SystemZSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
+ -SystemZMC::CallFrameSize, 8),
+ TM(tm), STI(sti) {
// Create a mapping from register number to save slot offset.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
- RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1];
+ RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
+}
+
+const TargetFrameLowering::SpillSlot *
+SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ NumEntries = array_lengthof(SpillOffsetTable);
+ return SpillOffsetTable;
}
void SystemZFrameLowering::
@@ -103,7 +111,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
const SystemZTargetMachine &TM,
unsigned GPR64, bool IsImplicit) {
const SystemZRegisterInfo *RI = TM.getRegisterInfo();
- unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit);
+ unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
@@ -127,14 +135,12 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Scan the call-saved GPRs and find the bounds of the register spill area.
- unsigned SavedGPRFrameSize = 0;
unsigned LowGPR = 0;
unsigned HighGPR = SystemZ::R15D;
unsigned StartOffset = -1U;
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
if (SystemZ::GR64BitRegClass.contains(Reg)) {
- SavedGPRFrameSize += 8;
unsigned Offset = RegSpillOffsets[Reg];
assert(Offset && "Unexpected GPR save");
if (StartOffset > Offset) {
@@ -144,9 +150,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
}
}
- // Save information about the range and location of the call-saved
- // registers, for use by the epilogue inserter.
- ZFI->setSavedGPRFrameSize(SavedGPRFrameSize);
+ // Save the range of call-saved registers, for use by the epilogue inserter.
ZFI->setLowSavedGPR(LowGPR);
ZFI->setHighSavedGPR(HighGPR);
@@ -260,6 +264,22 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+void SystemZFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ uint64_t MaxReach = (MFFrame->estimateStackSize(MF) +
+ SystemZMC::CallFrameSize * 2);
+ if (!isUInt<12>(MaxReach)) {
+ // We may need register scavenging slots if some parts of the frame
+ // are outside the reach of an unsigned 12-bit displacement.
+ // Create 2 for the case where both addresses in an MVC are
+ // out of range.
+ RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false));
+ RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false));
+ }
+}
+
// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
static void emitIncrement(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -283,7 +303,7 @@ static void emitIncrement(MachineBasicBlock &MBB,
}
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg)
.addReg(Reg).addImm(ThisVal);
- // The PSW implicit def is dead.
+ // The CC implicit def is dead.
MI->getOperand(3).setIsDead();
NumBytes -= ThisVal;
}
@@ -297,7 +317,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineModuleInfo &MMI = MF.getMMI();
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
bool HasFP = hasFP(MF);
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -321,9 +341,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Reg = I->getReg();
if (SystemZ::GR64BitRegClass.contains(Reg)) {
int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
- MachineLocation StackSlot(MachineLocation::VirtualFP, Offset);
- MachineLocation RegValue(Reg);
- Moves.push_back(MachineMove(GPRSaveLabel, StackSlot, RegValue));
+ MMI.addFrameInst(MCCFIInstruction::createOffset(
+ GPRSaveLabel, MRI->getDwarfRegNum(Reg, true), Offset));
}
}
}
@@ -338,9 +357,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
.addSym(AdjustSPLabel);
- MachineLocation FPDest(MachineLocation::VirtualFP);
- MachineLocation FPSrc(MachineLocation::VirtualFP, SPOffsetFromCFA + Delta);
- Moves.push_back(MachineMove(AdjustSPLabel, FPDest, FPSrc));
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(
+ AdjustSPLabel, SPOffsetFromCFA + Delta));
SPOffsetFromCFA += Delta;
}
@@ -353,9 +371,9 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
.addSym(SetFPLabel);
- MachineLocation HardFP(SystemZ::R11D);
- MachineLocation VirtualFP(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(SetFPLabel, HardFP, VirtualFP));
+ unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
+ MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(SetFPLabel, HardFP));
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R11 as live on entry when
@@ -381,12 +399,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
// Add CFI for the this save.
if (!FPRSaveLabel)
FPRSaveLabel = MMI.getContext().CreateTempSymbol();
- unsigned Reg = I->getReg();
+ unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true);
int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx());
- MachineLocation Slot(MachineLocation::VirtualFP,
- SPOffsetFromCFA + Offset);
- MachineLocation RegValue(Reg);
- Moves.push_back(MachineMove(FPRSaveLabel, Slot, RegValue));
+ MMI.addFrameInst(MCCFIInstruction::createOffset(
+ FPRSaveLabel, Reg, SPOffsetFromCFA + Offset));
}
}
// Complete the CFI for the FPR saves, modelling them as taking effect
@@ -404,8 +420,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
// Skip the return instruction.
- assert(MBBI->getOpcode() == SystemZ::RET &&
- "Can only insert epilogue into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
uint64_t StackSize = getAllocatedStackSize(MF);
if (ZFI->getLowSavedGPR()) {
@@ -453,11 +468,6 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
// offset is therefore negative.
int64_t Offset = (MFFrame->getObjectOffset(FI) +
MFFrame->getOffsetAdjustment());
- if (FI >= 0)
- // Non-fixed objects are allocated below the incoming stack pointer.
- // Account for the space at the top of the frame that we choose not
- // to allocate.
- Offset += getUnallocatedTopBytes(MF);
// Make the offset relative to the incoming stack pointer.
Offset -= getOffsetOfLocalArea();
@@ -469,23 +479,12 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
}
uint64_t SystemZFrameLowering::
-getUnallocatedTopBytes(const MachineFunction &MF) const {
- return MF.getInfo<SystemZMachineFunctionInfo>()->getSavedGPRFrameSize();
-}
-
-uint64_t SystemZFrameLowering::
getAllocatedStackSize(const MachineFunction &MF) const {
const MachineFrameInfo *MFFrame = MF.getFrameInfo();
// Start with the size of the local variables and spill slots.
uint64_t StackSize = MFFrame->getStackSize();
- // Remove any bytes that we choose not to allocate.
- StackSize -= getUnallocatedTopBytes(MF);
-
- // Include space for an emergency spill slot, if one might be needed.
- StackSize += getEmergencySpillSlotSize(MF);
-
// We need to allocate the ABI-defined 160-byte base area whenever
// we allocate stack space for our own use and whenever we call another
// function.
@@ -495,19 +494,6 @@ getAllocatedStackSize(const MachineFunction &MF) const {
return StackSize;
}
-unsigned SystemZFrameLowering::
-getEmergencySpillSlotSize(const MachineFunction &MF) const {
- const MachineFrameInfo *MFFrame = MF.getFrameInfo();
- uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2;
- return isUInt<12>(MaxReach) ? 0 : 8;
-}
-
-unsigned SystemZFrameLowering::
-getEmergencySpillSlotOffset(const MachineFunction &MF) const {
- assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot");
- return SystemZMC::CallFrameSize;
-}
-
bool
SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
// The ABI requires us to allocate 160 bytes of stack space for the callee,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 5ca049c..9b0a1d5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -29,7 +29,10 @@ public:
SystemZFrameLowering(const SystemZTargetMachine &tm,
const SystemZSubtarget &sti);
- // Override FrameLowering.
+ // Override TargetFrameLowering.
+ virtual bool isFPCloseToIncomingSP() const LLVM_OVERRIDE { return false; }
+ virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const
+ LLVM_OVERRIDE;
virtual void
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const LLVM_OVERRIDE;
@@ -45,6 +48,8 @@ public:
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const
LLVM_OVERRIDE;
+ virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const;
virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE;
virtual void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const LLVM_OVERRIDE;
@@ -59,29 +64,9 @@ public:
MachineBasicBlock::iterator MI) const
LLVM_OVERRIDE;
- // The target-independent code automatically allocates save slots for
- // call-saved GPRs. However, we don't need those slots for SystemZ,
- // because the ABI sets aside GPR save slots in the caller-allocated part
- // of the frame. Since the target-independent code puts this unneeded
- // area at the top of the callee-allocated part of frame, we choose not
- // to allocate it and adjust the offsets accordingly. Return the
- // size of this unallocated area.
- // FIXME: seems a bit hackish.
- uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const;
-
// Return the number of bytes in the callee-allocated part of the frame.
uint64_t getAllocatedStackSize(const MachineFunction &MF) const;
- // Return the number of frame bytes that should be reserved for
- // an emergency spill slot, for use by the register scaveneger.
- // Return 0 if register scaveging won't be needed.
- unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const;
-
- // Return the offset from the frame pointer of the emergency spill slot,
- // which always fits within a 12-bit unsigned displacement field.
- // Only valid if getEmergencySpillSlotSize(MF) returns nonzero.
- unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const;
-
// Return the byte offset from the incoming stack pointer of Reg's
// ABI-defined save slot. Return 0 if no slot is defined for Reg.
unsigned getRegSpillOffset(unsigned Reg) const {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 442f0c4..f4a2773 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -91,44 +92,90 @@ struct SystemZAddressingMode {
}
};
+// Return a mask with Count low bits set.
+static uint64_t allOnes(unsigned int Count) {
+ return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
+}
+
+// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation
+// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and
+// Rotate (I5). The combined operand value is effectively:
+//
+// (or (rotl Input, Rotate), ~Mask)
+//
+// for RNSBG and:
+//
+// (and (rotl Input, Rotate), Mask)
+//
+// otherwise. The output value has BitSize bits, although Input may be
+// narrower (in which case the upper bits are don't care).
+struct RxSBGOperands {
+ RxSBGOperands(unsigned Op, SDValue N)
+ : Opcode(Op), BitSize(N.getValueType().getSizeInBits()),
+ Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63),
+ Rotate(0) {}
+
+ unsigned Opcode;
+ unsigned BitSize;
+ uint64_t Mask;
+ SDValue Input;
+ unsigned Start;
+ unsigned End;
+ unsigned Rotate;
+};
+
class SystemZDAGToDAGISel : public SelectionDAGISel {
const SystemZTargetLowering &Lowering;
const SystemZSubtarget &Subtarget;
// Used by SystemZOperands.td to create integer constants.
- inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) const {
return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
+ const SystemZTargetMachine &getTargetMachine() const {
+ return static_cast<const SystemZTargetMachine &>(TM);
+ }
+
+ const SystemZInstrInfo *getInstrInfo() const {
+ return getTargetMachine().getInstrInfo();
+ }
+
// Try to fold more of the base or index of AM into AM, where IsBase
// selects between the base and index.
- bool expandAddress(SystemZAddressingMode &AM, bool IsBase);
+ bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const;
// Try to describe N in AM, returning true on success.
- bool selectAddress(SDValue N, SystemZAddressingMode &AM);
+ bool selectAddress(SDValue N, SystemZAddressingMode &AM) const;
// Extract individual target operands from matched address AM.
void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
- SDValue &Base, SDValue &Disp);
+ SDValue &Base, SDValue &Disp) const;
void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
- SDValue &Base, SDValue &Disp, SDValue &Index);
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
// Try to match Addr as a FormBD address with displacement type DR.
// Return true on success, storing the base and displacement in
// Base and Disp respectively.
bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
- SDValue &Base, SDValue &Disp);
+ SDValue &Base, SDValue &Disp) const;
+
+ // Try to match Addr as a FormBDX address with displacement type DR.
+ // Return true on success and if the result had no index. Store the
+ // base and displacement in Base and Disp respectively.
+ bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp) const;
// Try to match Addr as a FormBDX* address of form Form with
// displacement type DR. Return true on success, storing the base,
// displacement and index in Base, Disp and Index respectively.
bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,
SystemZAddressingMode::DispRange DR, SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index);
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
// PC-relative address matching routines used by SystemZOperands.td.
- bool selectPCRelAddress(SDValue Addr, SDValue &Target) {
- if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) {
+ bool selectPCRelAddress(SDValue Addr, SDValue &Target) const {
+ if (SystemZISD::isPCREL(Addr.getOpcode())) {
Target = Addr.getOperand(0);
return true;
}
@@ -136,69 +183,104 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
}
// BD matching routines used by SystemZOperands.td.
- bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);
}
- bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
}
- bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);
}
- bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
}
+ // MVI matching routines used by SystemZOperands.td.
+ bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+ }
+ bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+ }
+
// BDX matching routines used by SystemZOperands.td.
bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp12Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp12Pair,
Addr, Base, Disp, Index);
}
bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,
SystemZAddressingMode::Disp12Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Only128,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Pair,
Addr, Base, Disp, Index);
}
bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
SystemZAddressingMode::Disp12Pair,
Addr, Base, Disp, Index);
}
bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
SystemZAddressingMode::Disp20Pair,
Addr, Base, Disp, Index);
}
+ // Check whether (or Op (and X InsertMask)) is effectively an insertion
+ // of X into bits InsertMask of some Y != Op. Return true if so and
+ // set Op to that Y.
+ bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const;
+
+ // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.
+ // Return true on success.
+ bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const;
+
+ // Try to fold some of RxSBG.Input into other fields of RxSBG.
+ // Return true on success.
+ bool expandRxSBG(RxSBGOperands &RxSBG) const;
+
+ // Return an undefined value of type VT.
+ SDValue getUNDEF(SDLoc DL, EVT VT) const;
+
+ // Convert N to VT, if it isn't already.
+ SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const;
+
+ // Try to implement AND or shift node N using RISBG with the zero flag set.
+ // Return the selected node on success, otherwise return null.
+ SDNode *tryRISBGZero(SDNode *N);
+
+ // Try to use RISBG or Opcode to implement OR or XOR node N.
+ // Return the selected node on success, otherwise return null.
+ SDNode *tryRxSBG(SDNode *N, unsigned Opcode);
+
// If Op0 is null, then Node is a constant that can be loaded using:
//
// (Opcode UpperVal LowerVal)
@@ -209,6 +291,26 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
+ // Return true if Load and Store are loads and stores of the same size
+ // and are guaranteed not to overlap. Such operations can be implemented
+ // using block (SS-format) instructions.
+ //
+ // Partial overlap would lead to incorrect code, since the block operations
+ // are logically bytewise, even though they have a fast path for the
+ // non-overlapping case. We also need to avoid full overlap (i.e. two
+ // addresses that might be equal at run time) because although that case
+ // would be handled correctly, it might be implemented by millicode.
+ bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const;
+
+ // N is a (store (load Y), X) pattern. Return true if it can use an MVC
+ // from Y to X.
+ bool storeLoadCanUseMVC(SDNode *N) const;
+
+ // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true
+ // if A[1 - I] == X and if N can use a block operation like NC from A[I]
+ // to X.
+ bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const;
+
public:
SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel),
@@ -294,9 +396,9 @@ static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,
// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
// between the base and index. Try to fold Op1 into AM's displacement.
static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
- SDValue Op0, ConstantSDNode *Op1) {
+ SDValue Op0, uint64_t Op1) {
// First try adjusting the displacement.
- int64_t TestDisp = AM.Disp + Op1->getSExtValue();
+ int64_t TestDisp = AM.Disp + Op1;
if (selectDisp(AM.DR, TestDisp)) {
changeComponent(AM, IsBase, Op0);
AM.Disp = TestDisp;
@@ -309,7 +411,7 @@ static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
}
bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
- bool IsBase) {
+ bool IsBase) const {
SDValue N = IsBase ? AM.Base : AM.Index;
unsigned Opcode = N.getOpcode();
if (Opcode == ISD::TRUNCATE) {
@@ -329,13 +431,23 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
return expandAdjDynAlloc(AM, IsBase, Op0);
if (Op0Code == ISD::Constant)
- return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0));
+ return expandDisp(AM, IsBase, Op1,
+ cast<ConstantSDNode>(Op0)->getSExtValue());
if (Op1Code == ISD::Constant)
- return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1));
+ return expandDisp(AM, IsBase, Op0,
+ cast<ConstantSDNode>(Op1)->getSExtValue());
if (IsBase && expandIndex(AM, Op0, Op1))
return true;
}
+ if (Opcode == SystemZISD::PCREL_OFFSET) {
+ SDValue Full = N.getOperand(0);
+ SDValue Base = N.getOperand(1);
+ SDValue Anchor = Base.getOperand(0);
+ uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() -
+ cast<GlobalAddressSDNode>(Anchor)->getOffset());
+ return expandDisp(AM, IsBase, Base, Offset);
+ }
return false;
}
@@ -414,14 +526,15 @@ static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {
// Return true if Addr is suitable for AM, updating AM if so.
bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
- SystemZAddressingMode &AM) {
+ SystemZAddressingMode &AM) const {
// Start out assuming that the address will need to be loaded separately,
// then try to extend it as much as we can.
AM.Base = Addr;
// First try treating the address as a constant.
if (Addr.getOpcode() == ISD::Constant &&
- expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr)))
+ expandDisp(AM, true, SDValue(),
+ cast<ConstantSDNode>(Addr)->getSExtValue()))
;
else
// Otherwise try expanding each component.
@@ -461,7 +574,7 @@ static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
EVT VT, SDValue &Base,
- SDValue &Disp) {
+ SDValue &Disp) const {
Base = AM.Base;
if (!Base.getNode())
// Register 0 means "no base". This is mostly useful for shifts.
@@ -474,7 +587,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
// Truncate values from i64 to i32, for shifts.
assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 &&
"Unexpected truncation");
- DebugLoc DL = Base.getDebugLoc();
+ SDLoc DL(Base);
SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base);
insertDAGNode(CurDAG, Base.getNode(), Trunc);
Base = Trunc;
@@ -486,7 +599,8 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
EVT VT, SDValue &Base,
- SDValue &Disp, SDValue &Index) {
+ SDValue &Disp,
+ SDValue &Index) const {
getAddressOperands(AM, VT, Base, Disp);
Index = AM.Index;
@@ -497,7 +611,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
SDValue Addr, SDValue &Base,
- SDValue &Disp) {
+ SDValue &Disp) const {
SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);
if (!selectAddress(Addr, AM))
return false;
@@ -506,10 +620,21 @@ bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
return true;
}
+bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp) const {
+ SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR);
+ if (!selectAddress(Addr, AM) || AM.Index.getNode())
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+ return true;
+}
+
bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
SystemZAddressingMode::DispRange DR,
SDValue Addr, SDValue &Base,
- SDValue &Disp, SDValue &Index) {
+ SDValue &Disp, SDValue &Index) const {
SystemZAddressingMode AM(Form, DR);
if (!selectAddress(Addr, AM))
return false;
@@ -518,11 +643,317 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
return true;
}
+bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
+ uint64_t InsertMask) const {
+ // We're only interested in cases where the insertion is into some operand
+ // of Op, rather than into Op itself. The only useful case is an AND.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ // We need a constant mask.
+ ConstantSDNode *MaskNode =
+ dyn_cast<ConstantSDNode>(Op.getOperand(1).getNode());
+ if (!MaskNode)
+ return false;
+
+ // It's not an insertion of Op.getOperand(0) if the two masks overlap.
+ uint64_t AndMask = MaskNode->getZExtValue();
+ if (InsertMask & AndMask)
+ return false;
+
+ // It's only an insertion if all bits are covered or are known to be zero.
+ // The inner check covers all cases but is more expensive.
+ uint64_t Used = allOnes(Op.getValueType().getSizeInBits());
+ if (Used != (AndMask | InsertMask)) {
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne);
+ if (Used != (AndMask | InsertMask | KnownZero.getZExtValue()))
+ return false;
+ }
+
+ Op = Op.getOperand(0);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG,
+ uint64_t Mask) const {
+ const SystemZInstrInfo *TII = getInstrInfo();
+ if (RxSBG.Rotate != 0)
+ Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate));
+ Mask &= RxSBG.Mask;
+ if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) {
+ RxSBG.Mask = Mask;
+ return true;
+ }
+ return false;
+}
+
+// Return true if any bits of (RxSBG.Input & Mask) are significant.
+static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) {
+ // Rotate the mask in the same way as RxSBG.Input is rotated.
+ if (RxSBG.Rotate != 0)
+ Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)));
+ return (Mask & RxSBG.Mask) != 0;
+}
+
+bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
+ SDValue N = RxSBG.Input;
+ unsigned Opcode = N.getOpcode();
+ switch (Opcode) {
+ case ISD::AND: {
+ if (RxSBG.Opcode == SystemZ::RNSBG)
+ return false;
+
+ ConstantSDNode *MaskNode =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!MaskNode)
+ return false;
+
+ SDValue Input = N.getOperand(0);
+ uint64_t Mask = MaskNode->getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask)) {
+ // If some bits of Input are already known zeros, those bits will have
+ // been removed from the mask. See if adding them back in makes the
+ // mask suitable.
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne);
+ Mask |= KnownZero.getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask))
+ return false;
+ }
+ RxSBG.Input = Input;
+ return true;
+ }
+
+ case ISD::OR: {
+ if (RxSBG.Opcode != SystemZ::RNSBG)
+ return false;
+
+ ConstantSDNode *MaskNode =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!MaskNode)
+ return false;
+
+ SDValue Input = N.getOperand(0);
+ uint64_t Mask = ~MaskNode->getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask)) {
+ // If some bits of Input are already known ones, those bits will have
+ // been removed from the mask. See if adding them back in makes the
+ // mask suitable.
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne);
+ Mask &= ~KnownOne.getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask))
+ return false;
+ }
+ RxSBG.Input = Input;
+ return true;
+ }
+
+ case ISD::ROTL: {
+ // Any 64-bit rotate left can be merged into the RxSBG.
+ if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64)
+ return false;
+ ConstantSDNode *CountNode
+ = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!CountNode)
+ return false;
+
+ RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ // Check that the extension bits are don't-care (i.e. are masked out
+ // by the final mask).
+ unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits();
+ if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize)))
+ return false;
+
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
+ case ISD::SHL: {
+ ConstantSDNode *CountNode =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!CountNode)
+ return false;
+
+ uint64_t Count = CountNode->getZExtValue();
+ unsigned BitSize = N.getValueType().getSizeInBits();
+ if (Count < 1 || Count >= BitSize)
+ return false;
+
+ if (RxSBG.Opcode == SystemZ::RNSBG) {
+ // Treat (shl X, count) as (rotl X, size-count) as long as the bottom
+ // count bits from RxSBG.Input are ignored.
+ if (maskMatters(RxSBG, allOnes(Count)))
+ return false;
+ } else {
+ // Treat (shl X, count) as (and (rotl X, count), ~0<<count).
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count))
+ return false;
+ }
+
+ RxSBG.Rotate = (RxSBG.Rotate + Count) & 63;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
+ case ISD::SRL:
+ case ISD::SRA: {
+ ConstantSDNode *CountNode =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!CountNode)
+ return false;
+
+ uint64_t Count = CountNode->getZExtValue();
+ unsigned BitSize = N.getValueType().getSizeInBits();
+ if (Count < 1 || Count >= BitSize)
+ return false;
+
+ if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) {
+ // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top
+ // count bits from RxSBG.Input are ignored.
+ if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count)))
+ return false;
+ } else {
+ // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count),
+ // which is similar to SLL above.
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count)))
+ return false;
+ }
+
+ RxSBG.Rotate = (RxSBG.Rotate - Count) & 63;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const {
+ SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return SDValue(N, 0);
+}
+
+SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
+ if (N.getValueType() == MVT::i32 && VT == MVT::i64)
+ return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32,
+ DL, VT, getUNDEF(DL, MVT::i64), N);
+ if (N.getValueType() == MVT::i64 && VT == MVT::i32)
+ return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N);
+ assert(N.getValueType() == VT && "Unexpected value types");
+ return N;
+}
+
+SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
+ unsigned Count = 0;
+ while (expandRxSBG(RISBG))
+ if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND)
+ Count += 1;
+ if (Count == 0)
+ return 0;
+ if (Count == 1) {
+ // Prefer to use normal shift instructions over RISBG, since they can handle
+ // all cases and are sometimes shorter.
+ if (N->getOpcode() != ISD::AND)
+ return 0;
+
+ // Prefer register extensions like LLC over RISBG. Also prefer to start
+ // out with normal ANDs if one instruction would be enough. We can convert
+ // these ANDs into an RISBG later if a three-address instruction is useful.
+ if (VT == MVT::i32 ||
+ RISBG.Mask == 0xff ||
+ RISBG.Mask == 0xffff ||
+ SystemZ::isImmLF(~RISBG.Mask) ||
+ SystemZ::isImmHF(~RISBG.Mask)) {
+ // Force the new mask into the DAG, since it may include known-one bits.
+ ConstantSDNode *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode());
+ if (MaskN->getZExtValue() != RISBG.Mask) {
+ SDValue NewMask = CurDAG->getConstant(RISBG.Mask, VT);
+ N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
+ return SelectCode(N);
+ }
+ return 0;
+ }
+ }
+
+ unsigned Opcode = SystemZ::RISBG;
+ EVT OpcodeVT = MVT::i64;
+ if (VT == MVT::i32 && Subtarget.hasHighWord()) {
+ Opcode = SystemZ::RISBMux;
+ OpcodeVT = MVT::i32;
+ RISBG.Start &= 31;
+ RISBG.End &= 31;
+ }
+ SDValue Ops[5] = {
+ getUNDEF(SDLoc(N), OpcodeVT),
+ convertTo(SDLoc(N), OpcodeVT, RISBG.Input),
+ CurDAG->getTargetConstant(RISBG.Start, MVT::i32),
+ CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32),
+ CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32)
+ };
+ N = CurDAG->getMachineNode(Opcode, SDLoc(N), OpcodeVT, Ops);
+ return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
+}
+
+SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
+ // Try treating each operand of N as the second operand of the RxSBG
+ // and see which goes deepest.
+ RxSBGOperands RxSBG[] = {
+ RxSBGOperands(Opcode, N->getOperand(0)),
+ RxSBGOperands(Opcode, N->getOperand(1))
+ };
+ unsigned Count[] = { 0, 0 };
+ for (unsigned I = 0; I < 2; ++I)
+ while (expandRxSBG(RxSBG[I]))
+ if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND)
+ Count[I] += 1;
+
+ // Do nothing if neither operand is suitable.
+ if (Count[0] == 0 && Count[1] == 0)
+ return 0;
+
+ // Pick the deepest second operand.
+ unsigned I = Count[0] > Count[1] ? 0 : 1;
+ SDValue Op0 = N->getOperand(I ^ 1);
+
+ // Prefer IC for character insertions from memory.
+ if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0)
+ if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Op0.getNode()))
+ if (Load->getMemoryVT() == MVT::i8)
+ return 0;
+
+ // See whether we can avoid an AND in the first operand by converting
+ // ROSBG to RISBG.
+ if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask))
+ Opcode = SystemZ::RISBG;
+
+ EVT VT = N->getValueType(0);
+ SDValue Ops[5] = {
+ convertTo(SDLoc(N), MVT::i64, Op0),
+ convertTo(SDLoc(N), MVT::i64, RxSBG[I].Input),
+ CurDAG->getTargetConstant(RxSBG[I].Start, MVT::i32),
+ CurDAG->getTargetConstant(RxSBG[I].End, MVT::i32),
+ CurDAG->getTargetConstant(RxSBG[I].Rotate, MVT::i32)
+ };
+ N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops);
+ return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
+}
+
SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
SDValue Op0, uint64_t UpperVal,
uint64_t LowerVal) {
EVT VT = Node->getValueType(0);
- DebugLoc DL = Node->getDebugLoc();
+ SDLoc DL(Node);
SDValue Upper = CurDAG->getConstant(UpperVal, VT);
if (Op0.getNode())
Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
@@ -533,6 +964,64 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
return Or.getNode();
}
+bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
+ LoadSDNode *Load) const {
+ // Check that the two memory operands have the same size.
+ if (Load->getMemoryVT() != Store->getMemoryVT())
+ return false;
+
+ // Volatility stops an access from being decomposed.
+ if (Load->isVolatile() || Store->isVolatile())
+ return false;
+
+ // There's no chance of overlap if the load is invariant.
+ if (Load->isInvariant())
+ return true;
+
+ // Otherwise we need to check whether there's an alias.
+ const Value *V1 = Load->getSrcValue();
+ const Value *V2 = Store->getSrcValue();
+ if (!V1 || !V2)
+ return false;
+
+ // Reject equality.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
+ int64_t End1 = Load->getSrcValueOffset() + Size;
+ int64_t End2 = Store->getSrcValueOffset() + Size;
+ if (V1 == V2 && End1 == End2)
+ return false;
+
+ return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
+ AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
+}
+
+bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
+ StoreSDNode *Store = cast<StoreSDNode>(N);
+ LoadSDNode *Load = cast<LoadSDNode>(Store->getValue());
+
+ // Prefer not to use MVC if either address can use ... RELATIVE LONG
+ // instructions.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
+ if (Size > 1 && Size <= 8) {
+ // Prefer LHRL, LRL and LGRL.
+ if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode()))
+ return false;
+ // Prefer STHRL, STRL and STGRL.
+ if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode()))
+ return false;
+ }
+
+ return canUseBlockOperation(Store, Load);
+}
+
+bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
+ unsigned I) const {
+ StoreSDNode *StoreA = cast<StoreSDNode>(N);
+ LoadSDNode *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
+ LoadSDNode *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
+ return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
+}
+
SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -545,12 +1034,21 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
}
unsigned Opcode = Node->getOpcode();
+ SDNode *ResNode = 0;
switch (Opcode) {
case ISD::OR:
+ if (Node->getOperand(1).getOpcode() != ISD::Constant)
+ ResNode = tryRxSBG(Node, SystemZ::ROSBG);
+ goto or_xor;
+
case ISD::XOR:
+ if (Node->getOperand(1).getOpcode() != ISD::Constant)
+ ResNode = tryRxSBG(Node, SystemZ::RXSBG);
+ // Fall through.
+ or_xor:
// If this is a 64-bit operation in which both 32-bit halves are nonzero,
// split the operation into two.
- if (Node->getValueType(0) == MVT::i64)
+ if (!ResNode && Node->getValueType(0) == MVT::i64)
if (ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
uint64_t Val = Op1->getZExtValue();
if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val))
@@ -559,6 +1057,17 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
}
break;
+ case ISD::AND:
+ if (Node->getOperand(1).getOpcode() != ISD::Constant)
+ ResNode = tryRxSBG(Node, SystemZ::RNSBG);
+ // Fall through.
+ case ISD::ROTL:
+ case ISD::SHL:
+ case ISD::SRL:
+ if (!ResNode)
+ ResNode = tryRISBGZero(Node);
+ break;
+
case ISD::Constant:
// If this is a 64-bit constant that is out of the range of LLILF,
// LLIHF and LGFI, split it into two 32-bit pieces.
@@ -583,10 +1092,32 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
}
}
break;
+
+ case SystemZISD::SELECT_CCMASK: {
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ // Prefer to put any load first, so that it can be matched as a
+ // conditional load.
+ if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) {
+ SDValue CCValid = Node->getOperand(2);
+ SDValue CCMask = Node->getOperand(3);
+ uint64_t ConstCCValid =
+ cast<ConstantSDNode>(CCValid.getNode())->getZExtValue();
+ uint64_t ConstCCMask =
+ cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
+ // Invert the condition.
+ CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask,
+ CCMask.getValueType());
+ SDValue Op4 = Node->getOperand(4);
+ Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);
+ }
+ break;
+ }
}
// Select the default instruction
- SDNode *ResNode = SelectCode(Node);
+ if (!ResNode)
+ ResNode = SelectCode(Node);
DEBUG(errs() << "=> ";
if (ResNode == NULL || ResNode == Node)
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index eb21b31..f6e1853 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -23,8 +23,23 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include <cctype>
+
using namespace llvm;
+namespace {
+// Represents a sequence for extracting a 0/1 value from an IPM result:
+// (((X ^ XORValue) + AddValue) >> Bit)
+struct IPMConversion {
+ IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
+ : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
+
+ int64_t XORValue;
+ int64_t AddValue;
+ unsigned Bit;
+};
+}
+
// Classify VT as either 32 or 64 bit.
static bool is32Bit(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
@@ -51,7 +66,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
MVT PtrVT = getPointerTy();
// Set up the register classes.
- addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
+ if (Subtarget.hasHighWord())
+ addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
+ else
+ addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
@@ -67,7 +85,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// TODO: It may be better to default to latency-oriented scheduling, however
// LLVM's current latency-oriented scheduler can't handle physreg definitions
- // such as SystemZ has with PSW, so set this to the register-pressure
+ // such as SystemZ has with CC, so set this to the register-pressure
// scheduler, because it can.
setSchedulingPreference(Sched::RegPressure);
@@ -83,8 +101,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
++I) {
MVT VT = MVT::SimpleValueType(I);
if (isTypeLegal(VT)) {
- // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND).
- setOperationAction(ISD::SETCC, VT, Expand);
+ // Lower SET_CC into an IPM-based sequence.
+ setOperationAction(ISD::SETCC, VT, Custom);
// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
setOperationAction(ISD::SELECT, VT, Expand);
@@ -128,9 +146,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
- // Use *MUL_LOHI where possible and a wider multiplication otherwise.
+ // Use *MUL_LOHI where possible instead of MULH*.
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Custom);
+ setOperationAction(ISD::UMUL_LOHI, VT, Custom);
// We have instructions for signed but not unsigned FP conversion.
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
@@ -165,14 +185,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
- // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
- // but they aren't really worth using. There is no 64-bit SMUL_LOHI,
- // but there is a 64-bit UMUL_LOHI: MLGR.
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
-
// FIXME: Can we support these natively?
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
@@ -200,10 +212,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
- // Expand these using getExceptionSelectorRegister() and
- // getExceptionPointerRegister().
- setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand);
- setOperationAction(ISD::EHSELECTION, PtrVT, Expand);
+ // Handle prefetches with PFD or PFDRL.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
@@ -214,6 +224,15 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// We can use FI for FRINT.
setOperationAction(ISD::FRINT, VT, Legal);
+ // We can use the extended form of FI for other rounding operations.
+ if (Subtarget.hasFPExtension()) {
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUND, VT, Legal);
+ }
+
// No special instructions for these.
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
@@ -246,6 +265,43 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+ // We want to use MVC in preference to even a single load/store pair.
+ MaxStoresPerMemcpy = 0;
+ MaxStoresPerMemcpyOptSize = 0;
+
+ // The main memset sequence is a byte store followed by an MVC.
+ // Two STC or MV..I stores win over that, but the kind of fused stores
+ // generated by target-independent code don't when the byte value is
+ // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
+ // than "STC;MVC". Handle the choice in target-specific code instead.
+ MaxStoresPerMemset = 0;
+ MaxStoresPerMemsetOptSize = 0;
+}
+
+EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ case MVT::f128:
+ return false;
+ default:
+ break;
+ }
+
+ return false;
}
bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
@@ -253,6 +309,47 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isZero() || Imm.isNegZero();
}
+bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ // Unaligned accesses should never be slower than the expanded version.
+ // We check specifically for aligned accesses in the few cases where
+ // they are required.
+ if (Fast)
+ *Fast = true;
+ return true;
+}
+
+bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // Punt on globals for now, although they can be used in limited
+ // RELATIVE LONG cases.
+ if (AM.BaseGV)
+ return false;
+
+ // Require a 20-bit signed offset.
+ if (!isInt<20>(AM.BaseOffs))
+ return false;
+
+ // Indexing is OK but no scale factor can be applied.
+ return AM.Scale == 0 || AM.Scale == 1;
+}
+
+bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
+ if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
+ return false;
+ unsigned FromBits = FromType->getPrimitiveSizeInBits();
+ unsigned ToBits = ToType->getPrimitiveSizeInBits();
+ return FromBits > ToBits;
+}
+
+bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
+ if (!FromVT.isInteger() || !ToVT.isInteger())
+ return false;
+ unsigned FromBits = FromVT.getSizeInBits();
+ unsigned ToBits = ToVT.getSizeInBits();
+ return FromBits > ToBits;
+}
+
//===----------------------------------------------------------------------===//
// Inline asm support
//===----------------------------------------------------------------------===//
@@ -264,6 +361,7 @@ SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'f': // Floating-point register
+ case 'h': // High-part register
case 'r': // General-purpose register
return C_RegisterClass;
@@ -306,6 +404,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
+ case 'h': // High-part register
case 'r': // General-purpose register
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_Register;
@@ -349,8 +448,24 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
return weight;
}
+// Parse a "{tNNN}" register constraint for which the register type "t"
+// has already been verified. MC is the class associated with "t" and
+// Map maps 0-based register numbers to LLVM register numbers.
+static std::pair<unsigned, const TargetRegisterClass *>
+parseRegisterNumber(const std::string &Constraint,
+ const TargetRegisterClass *RC, const unsigned *Map) {
+ assert(*(Constraint.end()-1) == '}' && "Missing '}'");
+ if (isdigit(Constraint[2])) {
+ std::string Suffix(Constraint.data() + 2, Constraint.size() - 2);
+ unsigned Index = atoi(Suffix.c_str());
+ if (Index < 16 && Map[Index])
+ return std::make_pair(Map[Index], RC);
+ }
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+}
+
std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
@@ -370,6 +485,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
+ case 'h': // High-part register (an LLVM extension)
+ return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
+
case 'f': // Floating-point register
if (VT == MVT::f64)
return std::make_pair(0U, &SystemZ::FP64BitRegClass);
@@ -378,6 +496,32 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
return std::make_pair(0U, &SystemZ::FP32BitRegClass);
}
}
+ if (Constraint[0] == '{') {
+ // We need to override the default register parsing for GPRs and FPRs
+ // because the interpretation depends on VT. The internal names of
+ // the registers are also different from the external names
+ // (F0D and F0S instead of F0, etc.).
+ if (Constraint[1] == 'r') {
+ if (VT == MVT::i32)
+ return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
+ SystemZMC::GR32Regs);
+ if (VT == MVT::i128)
+ return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
+ SystemZMC::GR128Regs);
+ return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
+ SystemZMC::GR64Regs);
+ }
+ if (Constraint[1] == 'f') {
+ if (VT == MVT::f32)
+ return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
+ SystemZMC::FP32Regs);
+ if (VT == MVT::f128)
+ return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
+ SystemZMC::FP128Regs);
+ return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
+ SystemZMC::FP64Regs);
+ }
+ }
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
@@ -433,10 +577,21 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
#include "SystemZGenCallingConv.inc"
+bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
+ Type *ToType) const {
+ return isTruncateFree(FromType, ToType);
+}
+
+bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+ return true;
+}
+
// Value is a value that has been passed to us in the location described by VA
// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
// any loads onto Chain.
-static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL,
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
CCValAssign &VA, SDValue Chain,
SDValue Value) {
// If the argument has been promoted from a smaller type, insert an
@@ -461,7 +616,7 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL,
// Value is a value of type VA.getValVT() that we need to copy into
// the location described by VA. Return a copy of Value converted to
// VA.getValVT(). The caller is responsible for handling indirect values.
-static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL,
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
CCValAssign &VA, SDValue Value) {
switch (VA.getLocInfo()) {
case CCValAssign::SExt:
@@ -480,7 +635,7 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL,
SDValue SystemZTargetLowering::
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
+ SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -595,35 +750,56 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
return Chain;
}
+static bool canUseSiblingCall(CCState ArgCCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs) {
+ // Punt if there are any indirect or stack arguments, or if the call
+ // needs the call-saved argument register R6.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc())
+ return false;
+ unsigned Reg = VA.getLocReg();
+ if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
+ return false;
+ }
+ return true;
+}
+
SDValue
SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &DL = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &DL = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
+ bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy();
- // SystemZ target does not yet support tail call optimization.
- isTailCall = false;
-
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+ // We don't support GuaranteedTailCallOpt, only automatically-detected
+ // sibling calls.
+ if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
+ IsTailCall = false;
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
// Mark the start of the call.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true));
+ if (!IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
+ DL);
// Copy argument values to their designated locations.
SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
@@ -672,22 +848,27 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Build a sequence of copy-to-reg nodes, chained and glued together.
- SDValue Glue;
- for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
- RegsToPass[I].second, Glue);
- Glue = Chain.getValue(1);
- }
-
// Accept direct calls by converting symbolic call addresses to the
- // associated Target* opcodes.
+ // associated Target* opcodes. Force %r1 to be used for indirect
+ // tail calls.
+ SDValue Glue;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (IsTailCall) {
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
+ Glue = Chain.getValue(1);
+ Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
+ }
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+ RegsToPass[I].second, Glue);
+ Glue = Chain.getValue(1);
}
// The first call operand is the chain and the second is the target address.
@@ -707,6 +888,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (IsTailCall)
+ return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size());
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
Glue = Chain.getValue(1);
@@ -714,7 +897,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getConstant(NumBytes, PtrVT, true),
DAG.getConstant(0, PtrVT, true),
- Glue);
+ Glue, DL);
Glue = Chain.getValue(1);
// Assign locations to each value returned by this call.
@@ -745,7 +928,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc DL, SelectionDAG &DAG) const {
+ SDLoc DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
// Assign locations to each returned value.
@@ -815,6 +998,96 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
#undef CONV
}
+// Return a sequence for getting a 1 from an IPM result when CC has a
+// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
+// The handling of CC values outside CCValid doesn't matter.
+static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
+ // Deal with cases where the result can be taken directly from a bit
+ // of the IPM result.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
+
+ // Deal with cases where we can add a value to force the sign bit
+ // to contain the right value. Putting the bit in 31 means we can
+ // use SRL rather than RISBG(L), and also makes it easier to get a
+ // 0/-1 value, so it has priority over the other tests below.
+ //
+ // These sequences rely on the fact that the upper two bits of the
+ // IPM result are zero.
+ uint64_t TopBit = uint64_t(1) << 31;
+ if (CCMask == (CCValid & SystemZ::CCMASK_0))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
+ return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2)))
+ return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_3))
+ return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ // Next try inverting the value and testing a bit. 0/1 could be
+ // handled this way too, but we dealt with that case above.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
+ return IPMConversion(-1, 0, SystemZ::IPM_CC);
+
+ // Handle cases where adding a value forces a non-sign bit to contain
+ // the right value.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
+ return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
+
+ // The remaing cases are 1, 2, 0/1/3 and 0/2/3. All these are
+ // can be done by inverting the low CC bit and applying one of the
+ // sign-based extractions above.
+ if (CCMask == (CCValid & SystemZ::CCMASK_1))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_2))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ llvm_unreachable("Unexpected CC combination");
+}
+
+// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
+// can be converted to a comparison against zero, adjust the operands
+// as necessary.
+static void adjustZeroCmp(SelectionDAG &DAG, bool &IsUnsigned,
+ SDValue &CmpOp0, SDValue &CmpOp1,
+ unsigned &CCMask) {
+ if (IsUnsigned)
+ return;
+
+ ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(CmpOp1.getNode());
+ if (!ConstOp1)
+ return;
+
+ int64_t Value = ConstOp1->getSExtValue();
+ if ((Value == -1 && CCMask == SystemZ::CCMASK_CMP_GT) ||
+ (Value == -1 && CCMask == SystemZ::CCMASK_CMP_LE) ||
+ (Value == 1 && CCMask == SystemZ::CCMASK_CMP_LT) ||
+ (Value == 1 && CCMask == SystemZ::CCMASK_CMP_GE)) {
+ CCMask ^= SystemZ::CCMASK_CMP_EQ;
+ CmpOp1 = DAG.getConstant(0, CmpOp1.getValueType());
+ }
+}
+
// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
// is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary.
static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
@@ -840,7 +1113,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
uint64_t Mask = (1 << NumBits) - 1;
if (Load->getExtensionType() == ISD::SEXTLOAD) {
int64_t SignedValue = Constant->getSExtValue();
- if (uint64_t(SignedValue) + (1 << (NumBits - 1)) > Mask)
+ if (uint64_t(SignedValue) + (1ULL << (NumBits - 1)) > Mask)
return;
// Unsigned comparison between two sign-extended values is equivalent
// to unsigned comparison between two zero-extended values.
@@ -859,7 +1132,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT)
// Test whether the high bit of the byte is set.
Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true;
- else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT)
+ else if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_GE)
// Test whether the high bit of the byte is clear.
Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true;
else
@@ -879,7 +1152,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
if (CmpOp0.getValueType() != MVT::i32 ||
Load->getExtensionType() != ExtType)
- CmpOp0 = DAG.getExtLoad(ExtType, Load->getDebugLoc(), MVT::i32,
+ CmpOp0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
Load->getChain(), Load->getBasePtr(),
Load->getPointerInfo(), Load->getMemoryVT(),
Load->isVolatile(), Load->isNonTemporal(),
@@ -891,67 +1164,309 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
CmpOp1 = DAG.getConstant(Value, MVT::i32);
}
-// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1
-// is an equality comparison that is better implemented using unsigned
-// rather than signed comparison instructions.
-static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0,
- SDValue CmpOp1, unsigned CCMask) {
- // The test must be for equality or inequality.
- if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE)
+// Return true if Op is either an unextended load, or a load suitable
+// for integer register-memory comparisons of type ICmpType.
+static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op.getNode());
+ if (Load) {
+ // There are no instructions to compare a register with a memory byte.
+ if (Load->getMemoryVT() == MVT::i8)
+ return false;
+ // Otherwise decide on extension type.
+ switch (Load->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ return true;
+ case ISD::SEXTLOAD:
+ return ICmpType != SystemZICMP::UnsignedOnly;
+ case ISD::ZEXTLOAD:
+ return ICmpType != SystemZICMP::SignedOnly;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+// Return true if it is better to swap comparison operands Op0 and Op1.
+// ICmpType is the type of an integer comparison.
+static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1,
+ unsigned ICmpType) {
+ // Leave f128 comparisons alone, since they have no memory forms.
+ if (Op0.getValueType() == MVT::f128)
return false;
- if (CmpOp1.getOpcode() == ISD::Constant) {
- uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue();
+ // Always keep a floating-point constant second, since comparisons with
+ // zero can use LOAD TEST and comparisons with other constants make a
+ // natural memory operand.
+ if (isa<ConstantFPSDNode>(Op1))
+ return false;
- // If we're comparing with memory, prefer unsigned comparisons for
- // values that are in the unsigned 16-bit range but not the signed
- // 16-bit range. We want to use CLFHSI and CLGHSI.
- if (CmpOp0.hasOneUse() &&
- ISD::isNormalLoad(CmpOp0.getNode()) &&
- (Value >= 32768 && Value < 65536))
- return true;
+ // Never swap comparisons with zero since there are many ways to optimize
+ // those later.
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp1 && COp1->getZExtValue() == 0)
+ return false;
- // Use unsigned comparisons for values that are in the CLGFI range
- // but not in the CGFI range.
- if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1)
+ // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
+ // In that case we generally prefer the memory to be second.
+ if ((isNaturalMemoryOperand(Op0, ICmpType) && Op0.hasOneUse()) &&
+ !(isNaturalMemoryOperand(Op1, ICmpType) && Op1.hasOneUse())) {
+ // The only exceptions are when the second operand is a constant and
+ // we can use things like CHHSI.
+ if (!COp1)
return true;
+ // The unsigned memory-immediate instructions can handle 16-bit
+ // unsigned integers.
+ if (ICmpType != SystemZICMP::SignedOnly &&
+ isUInt<16>(COp1->getZExtValue()))
+ return false;
+ // The signed memory-immediate instructions can handle 16-bit
+ // signed integers.
+ if (ICmpType != SystemZICMP::UnsignedOnly &&
+ isInt<16>(COp1->getSExtValue()))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+// Return true if shift operation N has an in-range constant shift value.
+// Store it in ShiftVal if so.
+static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!Shift)
+ return false;
+ uint64_t Amount = Shift->getZExtValue();
+ if (Amount >= N.getValueType().getSizeInBits())
return false;
+
+ ShiftVal = Amount;
+ return true;
+}
+
+// Check whether an AND with Mask is suitable for a TEST UNDER MASK
+// instruction and whether the CC value is descriptive enough to handle
+// a comparison of type Opcode between the AND result and CmpVal.
+// CCMask says which comparison result is being tested and BitSize is
+// the number of bits in the operands. If TEST UNDER MASK can be used,
+// return the corresponding CC mask, otherwise return 0.
+static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
+ uint64_t Mask, uint64_t CmpVal,
+ unsigned ICmpType) {
+ assert(Mask != 0 && "ANDs with zero should have been removed by now");
+
+ // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
+ if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
+ !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
+ return 0;
+
+ // Work out the masks for the lowest and highest bits.
+ unsigned HighShift = 63 - countLeadingZeros(Mask);
+ uint64_t High = uint64_t(1) << HighShift;
+ uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
+
+ // Signed ordered comparisons are effectively unsigned if the sign
+ // bit is dropped.
+ bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
+
+ // Check for equality comparisons with 0, or the equivalent.
+ if (CmpVal == 0) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal <= Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal < Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_SOME_1;
}
- // Prefer CL for zero-extended loads.
- if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND ||
- ISD::isZEXTLoad(CmpOp1.getNode()))
- return true;
+ // Check for equality comparisons with the mask, or the equivalent.
+ if (CmpVal == Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
- // ...and for "in-register" zero extensions.
- if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) {
- SDValue Mask = CmpOp1.getOperand(1);
- if (Mask.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff)
- return true;
+ // Check for ordered comparisons with the top bit.
+ if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_MSB_1;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_MSB_1;
}
- return false;
+ // If there are just two bits, we can do equality checks for Low and High
+ // as well.
+ if (Mask == Low + High) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
+ }
+
+ // Looks like we've exhausted our options.
+ return 0;
+}
+
+// See whether the comparison (Opcode CmpOp0, CmpOp1, ICmpType) can be
+// implemented as a TEST UNDER MASK instruction when the condition being
+// tested is as described by CCValid and CCMask. Update the arguments
+// with the TM version if so.
+static void adjustForTestUnderMask(SelectionDAG &DAG, unsigned &Opcode,
+ SDValue &CmpOp0, SDValue &CmpOp1,
+ unsigned &CCValid, unsigned &CCMask,
+ unsigned &ICmpType) {
+ // Check that we have a comparison with a constant.
+ ConstantSDNode *ConstCmpOp1 = dyn_cast<ConstantSDNode>(CmpOp1);
+ if (!ConstCmpOp1)
+ return;
+ uint64_t CmpVal = ConstCmpOp1->getZExtValue();
+
+ // Check whether the nonconstant input is an AND with a constant mask.
+ if (CmpOp0.getOpcode() != ISD::AND)
+ return;
+ SDValue AndOp0 = CmpOp0.getOperand(0);
+ SDValue AndOp1 = CmpOp0.getOperand(1);
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(AndOp1.getNode());
+ if (!Mask)
+ return;
+ uint64_t MaskVal = Mask->getZExtValue();
+
+ // Check whether the combination of mask, comparison value and comparison
+ // type are suitable.
+ unsigned BitSize = CmpOp0.getValueType().getSizeInBits();
+ unsigned NewCCMask, ShiftVal;
+ if (ICmpType != SystemZICMP::SignedOnly &&
+ AndOp0.getOpcode() == ISD::SHL &&
+ isSimpleShift(AndOp0, ShiftVal) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal >> ShiftVal,
+ CmpVal >> ShiftVal,
+ SystemZICMP::Any))) {
+ AndOp0 = AndOp0.getOperand(0);
+ AndOp1 = DAG.getConstant(MaskVal >> ShiftVal, AndOp0.getValueType());
+ } else if (ICmpType != SystemZICMP::SignedOnly &&
+ AndOp0.getOpcode() == ISD::SRL &&
+ isSimpleShift(AndOp0, ShiftVal) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, CCMask,
+ MaskVal << ShiftVal,
+ CmpVal << ShiftVal,
+ SystemZICMP::UnsignedOnly))) {
+ AndOp0 = AndOp0.getOperand(0);
+ AndOp1 = DAG.getConstant(MaskVal << ShiftVal, AndOp0.getValueType());
+ } else {
+ NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, CmpVal,
+ ICmpType);
+ if (!NewCCMask)
+ return;
+ }
+
+ // Go ahead and make the change.
+ Opcode = SystemZISD::TM;
+ CmpOp0 = AndOp0;
+ CmpOp1 = AndOp1;
+ ICmpType = (bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
+ bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
+ CCValid = SystemZ::CCMASK_TM;
+ CCMask = NewCCMask;
}
-// Return a target node that compares CmpOp0 and CmpOp1. Set CCMask to the
-// 4-bit condition-code mask for CC.
-static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
- ISD::CondCode CC, unsigned &CCMask) {
+// Return a target node that compares CmpOp0 with CmpOp1 and stores a
+// 2-bit result in CC. Set CCValid to the CCMASK_* of all possible
+// 2-bit results and CCMask to the subset of those results that are
+// associated with Cond.
+static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG,
+ SDLoc DL, SDValue CmpOp0, SDValue CmpOp1,
+ ISD::CondCode Cond, unsigned &CCValid,
+ unsigned &CCMask) {
bool IsUnsigned = false;
- CCMask = CCMaskForCondCode(CC);
- if (!CmpOp0.getValueType().isFloatingPoint()) {
+ CCMask = CCMaskForCondCode(Cond);
+ unsigned Opcode, ICmpType = 0;
+ if (CmpOp0.getValueType().isFloatingPoint()) {
+ CCValid = SystemZ::CCMASK_FCMP;
+ Opcode = SystemZISD::FCMP;
+ } else {
IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO;
- CCMask &= ~SystemZ::CCMASK_CMP_UO;
+ CCValid = SystemZ::CCMASK_ICMP;
+ CCMask &= CCValid;
+ adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
- if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask))
- IsUnsigned = true;
+ Opcode = SystemZISD::ICMP;
+ // Choose the type of comparison. Equality and inequality tests can
+ // use either signed or unsigned comparisons. The choice also doesn't
+ // matter if both sign bits are known to be clear. In those cases we
+ // want to give the main isel code the freedom to choose whichever
+ // form fits best.
+ if (CCMask == SystemZ::CCMASK_CMP_EQ ||
+ CCMask == SystemZ::CCMASK_CMP_NE ||
+ (DAG.SignBitIsZero(CmpOp0) && DAG.SignBitIsZero(CmpOp1)))
+ ICmpType = SystemZICMP::Any;
+ else if (IsUnsigned)
+ ICmpType = SystemZICMP::UnsignedOnly;
+ else
+ ICmpType = SystemZICMP::SignedOnly;
}
- DebugLoc DL = CmpOp0.getDebugLoc();
- return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
- DL, MVT::Glue, CmpOp0, CmpOp1);
+ if (shouldSwapCmpOperands(CmpOp0, CmpOp1, ICmpType)) {
+ std::swap(CmpOp0, CmpOp1);
+ CCMask = ((CCMask & SystemZ::CCMASK_CMP_EQ) |
+ (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_UO));
+ }
+
+ adjustForTestUnderMask(DAG, Opcode, CmpOp0, CmpOp1, CCValid, CCMask,
+ ICmpType);
+ if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM)
+ return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1,
+ DAG.getConstant(ICmpType, MVT::i32));
+ return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1);
+}
+
+// Implement a 32-bit *MUL_LOHI operation by extending both operands to
+// 64 bits. Extend is the extension type to use. Store the high part
+// in Hi and the low part in Lo.
+static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
+ unsigned Extend, SDValue Op0, SDValue Op1,
+ SDValue &Hi, SDValue &Lo) {
+ Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
+ Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
+ Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
}
// Lower a binary operation that produces two VT results, one in each
@@ -959,7 +1474,7 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
// on the extended Op0 and (unextended) Op1. Store the even register result
// in Even and the odd register result in Odd.
-static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
unsigned Extend, unsigned Opcode,
SDValue Op0, SDValue Op1,
SDValue &Even, SDValue &Odd) {
@@ -967,14 +1482,38 @@ static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT,
SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
SDValue(In128, 0), Op1);
bool Is32Bit = is32Bit(VT);
- SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT);
- SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT);
- SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- VT, Result, SubReg0);
- SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- VT, Result, SubReg1);
- Even = SDValue(Reg0, 0);
- Odd = SDValue(Reg1, 0);
+ Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
+ Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CmpOp0 = Op.getOperand(0);
+ SDValue CmpOp1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc DL(Op);
+
+ unsigned CCValid, CCMask;
+ SDValue Glue = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
+
+ IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
+ SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+
+ if (Conversion.XORValue)
+ Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.XORValue, MVT::i32));
+
+ if (Conversion.AddValue)
+ Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.AddValue, MVT::i32));
+
+ // The SHR/AND sequence should get optimized to an RISBG.
+ Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.Bit, MVT::i32));
+ if (Conversion.Bit != 31)
+ Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
+ DAG.getConstant(1, MVT::i32));
+ return Result;
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -983,12 +1522,13 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue CmpOp0 = Op.getOperand(2);
SDValue CmpOp1 = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
- unsigned CCMask;
- SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+ unsigned CCValid, CCMask;
+ SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
- Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
+ Chain, DAG.getConstant(CCValid, MVT::i32),
+ DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
}
SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
@@ -998,14 +1538,15 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
SDValue TrueOp = Op.getOperand(2);
SDValue FalseOp = Op.getOperand(3);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
- unsigned CCMask;
- SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+ unsigned CCValid, CCMask;
+ SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
- SmallVector<SDValue, 4> Ops;
+ SmallVector<SDValue, 5> Ops;
Ops.push_back(TrueOp);
Ops.push_back(FalseOp);
+ Ops.push_back(DAG.getConstant(CCValid, MVT::i32));
Ops.push_back(DAG.getConstant(CCMask, MVT::i32));
Ops.push_back(Flags);
@@ -1015,7 +1556,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const {
- DebugLoc DL = Node->getDebugLoc();
+ SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
int64_t Offset = Node->getOffset();
EVT PtrVT = getPointerTy();
@@ -1024,18 +1565,18 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
SDValue Result;
if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
- // Make sure that the offset is aligned to a halfword. If it isn't,
- // create an "anchor" at the previous 12-bit boundary.
- // FIXME check whether there is a better way of handling this.
- if (Offset & 1) {
- Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
- Offset & ~uint64_t(0xfff));
- Offset &= 0xfff;
- } else {
- Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
+ // Assign anchors at 1<<12 byte boundaries.
+ uint64_t Anchor = Offset & ~uint64_t(0xfff);
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+
+ // The offset can be folded into the address if it is aligned to a halfword.
+ Offset -= Anchor;
+ if (Offset != 0 && (Offset & 1) == 0) {
+ SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
+ Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
Offset = 0;
}
- Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
} else {
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -1054,7 +1595,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const {
- DebugLoc DL = Node->getDebugLoc();
+ SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
EVT PtrVT = getPointerTy();
TLSModel::Model model = TM.getTLSModel(GV);
@@ -1093,7 +1634,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
SelectionDAG &DAG) const {
- DebugLoc DL = Node->getDebugLoc();
+ SDLoc DL(Node);
const BlockAddress *BA = Node->getBlockAddress();
int64_t Offset = Node->getOffset();
EVT PtrVT = getPointerTy();
@@ -1105,7 +1646,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
SelectionDAG &DAG) const {
- DebugLoc DL = JT->getDebugLoc();
+ SDLoc DL(JT);
EVT PtrVT = getPointerTy();
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
@@ -1115,7 +1656,7 @@ SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SelectionDAG &DAG) const {
- DebugLoc DL = CP->getDebugLoc();
+ SDLoc DL(CP);
EVT PtrVT = getPointerTy();
SDValue Result;
@@ -1132,29 +1673,38 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
EVT ResVT = Op.getValueType();
- SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDValue Shift32 = DAG.getConstant(32, MVT::i64);
if (InVT == MVT::i32 && ResVT == MVT::f32) {
- SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
- SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32);
- SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift);
- SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::f32, Out64, SubReg32);
- return SDValue(Out, 0);
+ SDValue In64;
+ if (Subtarget.hasHighWord()) {
+ SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
+ MVT::i64);
+ In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ MVT::i64, SDValue(U64, 0), In);
+ } else {
+ In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
+ In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
+ DAG.getConstant(32, MVT::i64));
+ }
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
+ DL, MVT::f32, Out64);
}
if (InVT == MVT::f32 && ResVT == MVT::i32) {
SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
- SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
- MVT::f64, SDValue(U64, 0), In, SubReg32);
- SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0));
- SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32);
- SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
- return Out;
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ MVT::f64, SDValue(U64, 0), In);
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
+ if (Subtarget.hasHighWord())
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
+ MVT::i32, Out64);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
+ DAG.getConstant(32, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
}
llvm_unreachable("Unexpected bitcast combination");
}
@@ -1169,7 +1719,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
// The initial values of each field.
const unsigned NumFields = 4;
@@ -1203,7 +1753,7 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
SDValue SrcPtr = Op.getOperand(2);
const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
/*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
@@ -1214,7 +1764,7 @@ SDValue SystemZTargetLowering::
lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
unsigned SPReg = getStackPointerRegisterToSaveRestore();
@@ -1237,18 +1787,64 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
- assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
+ SDLoc DL(Op);
+ SDValue Ops[2];
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else {
+ // Do a full 128-bit multiplication based on UMUL_LOHI64:
+ //
+ // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
+ //
+ // but using the fact that the upper halves are either all zeros
+ // or all ones:
+ //
+ // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
+ //
+ // and grouping the right terms together since they are quicker than the
+ // multiplication:
+ //
+ // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
+ SDValue C63 = DAG.getConstant(63, MVT::i64);
+ SDValue LL = Op.getOperand(0);
+ SDValue RL = Op.getOperand(1);
+ SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
+ SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. SMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ LL, RL, Ops[1], Ops[0]);
+ SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
+ SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
+ SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
+ Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
+ }
+ return DAG.getMergeValues(Ops, 2, DL);
+}
- // UMUL_LOHI64 returns the low result in the odd register and the high
- // result in the even register. UMUL_LOHI is defined to return the
- // low half first, so the results are in reverse order.
+SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
SDValue Ops[2];
- lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
- Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. UMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, 2, DL);
}
@@ -1257,19 +1853,24 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
+ unsigned Opcode;
// We use DSGF for 32-bit division.
if (is32Bit(VT)) {
Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
- Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1);
- }
+ Opcode = SystemZISD::SDIVREM32;
+ } else if (DAG.ComputeNumSignBits(Op1) > 32) {
+ Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
+ Opcode = SystemZISD::SDIVREM32;
+ } else
+ Opcode = SystemZISD::SDIVREM64;
// DSG(F) takes a 64-bit dividend, so the even register in the GR128
// input is "don't care". The instruction returns the remainder in
// the even register and the quotient in the odd register.
SDValue Ops[2];
- lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64,
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
Op0, Op1, Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, 2, DL);
}
@@ -1277,7 +1878,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
// DL(G) uses a double-width dividend, so we need to clear the even
// register in the GR128 input. The instruction returns the remainder
@@ -1332,22 +1933,20 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
// high 32 bits and just masks out low bits. We can skip it if so.
if (HighOp.getOpcode() == ISD::AND &&
HighOp.getOperand(1).getOpcode() == ISD::Constant) {
- ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
- uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
- if ((Mask >> 32) == 0xffffffff)
- HighOp = HighOp.getOperand(0);
+ SDValue HighOp0 = HighOp.getOperand(0);
+ uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
+ if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
+ HighOp = HighOp0;
}
// Take advantage of the fact that all GR32 operations only change the
// low 32 bits by truncating Low to an i32 and inserting it directly
// using a subreg. The interesting cases are those where the truncation
// can be folded.
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
- SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
- MVT::i64, HighOp, Low32, SubReg32);
- return SDValue(Result, 0);
+ return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
+ MVT::i64, HighOp, Low32);
}
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
@@ -1368,7 +1967,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SDValue Addr = Node->getBasePtr();
SDValue Src2 = Node->getVal();
MachineMemOperand *MMO = Node->getMemOperand();
- DebugLoc DL = Node->getDebugLoc();
+ SDLoc DL(Node);
EVT PtrVT = Addr.getValueType();
// Convert atomic subtracts of constants into additions.
@@ -1442,7 +2041,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
SDValue CmpVal = Node->getOperand(2);
SDValue SwapVal = Node->getOperand(3);
MachineMemOperand *MMO = Node->getMemOperand();
- DebugLoc DL = Node->getDebugLoc();
+ SDLoc DL(Node);
EVT PtrVT = Addr.getValueType();
// Get the address of the containing word.
@@ -1474,7 +2073,7 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
- return DAG.getCopyFromReg(Op.getOperand(0), Op.getDebugLoc(),
+ return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
SystemZ::R15D, Op.getValueType());
}
@@ -1482,10 +2081,30 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
- return DAG.getCopyToReg(Op.getOperand(0), Op.getDebugLoc(),
+ return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
SystemZ::R15D, Op.getOperand(1));
}
+SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (!IsData)
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
+ MemIntrinsicSDNode *Node = cast<MemIntrinsicSDNode>(Op.getNode());
+ SDValue Ops[] = {
+ Op.getOperand(0),
+ DAG.getConstant(Code, MVT::i32),
+ Op.getOperand(1)
+ };
+ return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
+ Node->getVTList(), Ops, array_lengthof(Ops),
+ Node->getMemoryVT(), Node->getMemOperand());
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1493,6 +2112,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerBR_CC(Op, DAG);
case ISD::SELECT_CC:
return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC:
+ return lowerSETCC(Op, DAG);
case ISD::GlobalAddress:
return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
case ISD::GlobalTLSAddress:
@@ -1511,6 +2132,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerVACOPY(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SMUL_LOHI:
+ return lowerSMUL_LOHI(Op, DAG);
case ISD::UMUL_LOHI:
return lowerUMUL_LOHI(Op, DAG);
case ISD::SDIVREM:
@@ -1547,6 +2170,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSTACKSAVE(Op, DAG);
case ISD::STACKRESTORE:
return lowerSTACKRESTORE(Op, DAG);
+ case ISD::PREFETCH:
+ return lowerPREFETCH(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -1557,9 +2182,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
OPCODE(RET_FLAG);
OPCODE(CALL);
+ OPCODE(SIBCALL);
OPCODE(PCREL_WRAPPER);
- OPCODE(CMP);
- OPCODE(UCMP);
+ OPCODE(PCREL_OFFSET);
+ OPCODE(ICMP);
+ OPCODE(FCMP);
+ OPCODE(TM);
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
@@ -1568,6 +2196,20 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SDIVREM64);
OPCODE(UDIVREM32);
OPCODE(UDIVREM64);
+ OPCODE(MVC);
+ OPCODE(MVC_LOOP);
+ OPCODE(NC);
+ OPCODE(NC_LOOP);
+ OPCODE(OC);
+ OPCODE(OC_LOOP);
+ OPCODE(XC);
+ OPCODE(XC_LOOP);
+ OPCODE(CLC);
+ OPCODE(CLC_LOOP);
+ OPCODE(STRCMP);
+ OPCODE(STPCPY);
+ OPCODE(SEARCH_STRING);
+ OPCODE(IPM);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -1580,6 +2222,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
+ OPCODE(PREFETCH);
}
return NULL;
#undef OPCODE
@@ -1609,6 +2252,31 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
return NewMBB;
}
+// Split MBB before MI and return the new block (the one that contains MI).
+static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+// Force base value Base into a register before MI. Return the register.
+static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
+ const SystemZInstrInfo *TII) {
+ if (Base.isReg())
+ return Base.getReg();
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
+ .addOperand(Base).addImm(0).addReg(0);
+ return Reg;
+}
+
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
@@ -1618,21 +2286,20 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
unsigned DestReg = MI->getOperand(0).getReg();
unsigned TrueReg = MI->getOperand(1).getReg();
unsigned FalseReg = MI->getOperand(2).getReg();
- unsigned CCMask = MI->getOperand(3).getImm();
+ unsigned CCValid = MI->getOperand(3).getImm();
+ unsigned CCMask = MI->getOperand(4).getImm();
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
- // ...
- // TrueVal = ...
- // cmpTY ccX, r1, r2
- // jCC JoinMBB
+ // BRC CCMask, JoinMBB
// # fallthrough to FalseMBB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
MBB->addSuccessor(JoinMBB);
MBB->addSuccessor(FalseMBB);
@@ -1645,7 +2312,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
// ...
MBB = JoinMBB;
- BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
.addReg(TrueReg).addMBB(StartMBB)
.addReg(FalseReg).addMBB(FalseMBB);
@@ -1653,6 +2320,69 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
return JoinMBB;
}
+// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
+// StoreOpcode is the store to use and Invert says whether the store should
+// happen when the condition is false rather than true. If a STORE ON
+// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
+MachineBasicBlock *
+SystemZTargetLowering::emitCondStore(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned StoreOpcode, unsigned STOCOpcode,
+ bool Invert) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ MachineOperand Base = MI->getOperand(1);
+ int64_t Disp = MI->getOperand(2).getImm();
+ unsigned IndexReg = MI->getOperand(3).getReg();
+ unsigned CCValid = MI->getOperand(4).getImm();
+ unsigned CCMask = MI->getOperand(5).getImm();
+ DebugLoc DL = MI->getDebugLoc();
+
+ StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
+
+ // Use STOCOpcode if possible. We could use different store patterns in
+ // order to avoid matching the index register, but the performance trade-offs
+ // might be more complicated in that case.
+ if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) {
+ if (Invert)
+ CCMask ^= CCValid;
+ BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
+ .addReg(SrcReg).addOperand(Base).addImm(Disp)
+ .addImm(CCValid).addImm(CCMask);
+ MI->eraseFromParent();
+ return MBB;
+ }
+
+ // Get the condition needed to branch around the store.
+ if (!Invert)
+ CCMask ^= CCValid;
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // BRC CCMask, JoinMBB
+ // # fallthrough to FalseMBB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
+ MBB->addSuccessor(JoinMBB);
+ MBB->addSuccessor(FalseMBB);
+
+ // FalseMBB:
+ // store %SrcReg, %Disp(%Index,%Base)
+ // # fallthrough to JoinMBB
+ MBB = FalseMBB;
+ BuildMI(MBB, DL, TII->get(StoreOpcode))
+ .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
+ MBB->addSuccessor(JoinMBB);
+
+ MI->eraseFromParent();
+ return JoinMBB;
+}
+
// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
@@ -1669,7 +2399,6 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
bool IsSubWord = (BitSize < 32);
// Extract the operands. Base can be a register or a frame index.
@@ -1706,7 +2435,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1740,11 +2469,11 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
.addReg(RotatedOldVal).addOperand(Src2);
if (BitSize < 32)
// XILF with the upper BitSize bits set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
.addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
else if (BitSize == 32)
// XILF with every bit set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
.addReg(Tmp).addImm(~uint32_t(0));
else {
// Use LCGR and add -1 to the result, which is more compact than
@@ -1769,7 +2498,8 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
@@ -1792,7 +2522,6 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
bool IsSubWord = (BitSize < 32);
// Extract the operands. Base can be a register or a frame index.
@@ -1828,7 +2557,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
@@ -1846,7 +2575,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
// %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
// %RotatedOldVal = RLL %OldVal, 0(%BitShift)
// CompareOpcode %RotatedOldVal, %Src2
- // BRCL KeepOldMask, UpdateMBB
+ // BRC KeepOldMask, UpdateMBB
MBB = LoopMBB;
BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
.addReg(OrigVal).addMBB(StartMBB)
@@ -1856,8 +2585,8 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
.addReg(OldVal).addReg(BitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CompareOpcode))
.addReg(RotatedOldVal).addReg(Src2);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL))
- .addImm(KeepOldMask).addMBB(UpdateMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
MBB->addSuccessor(UpdateMBB);
MBB->addSuccessor(UseAltMBB);
@@ -1887,7 +2616,8 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
@@ -1903,7 +2633,6 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
// Extract the operands. Base can be a register or a frame index.
unsigned Dest = MI->getOperand(0).getReg();
@@ -1935,7 +2664,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
@@ -1978,7 +2707,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
.addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
BuildMI(MBB, DL, TII->get(SystemZ::CR))
.addReg(Dest).addReg(RetryCmpVal);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
MBB->addSuccessor(DoneMBB);
MBB->addSuccessor(SetMBB);
@@ -1998,7 +2729,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
.addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
@@ -2008,8 +2740,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
// if the high register of the GR128 value must be cleared or false if
-// it's "don't care". SubReg is subreg_odd32 when extending a GR32
-// and subreg_odd when extending a GR64.
+// it's "don't care". SubReg is subreg_l32 when extending a GR32
+// and subreg_l64 when extending a GR64.
MachineBasicBlock *
SystemZTargetLowering::emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -2031,7 +2763,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
.addImm(0);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
- .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high);
+ .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
In128 = NewIn128;
}
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
@@ -2041,9 +2773,238 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
return MBB;
}
+MachineBasicBlock *
+SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
+ uint64_t DestDisp = MI->getOperand(1).getImm();
+ MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2));
+ uint64_t SrcDisp = MI->getOperand(3).getImm();
+ uint64_t Length = MI->getOperand(4).getImm();
+
+ // When generating more than one CLC, all but the last will need to
+ // branch to the end when a difference is found.
+ MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
+ splitBlockAfter(MI, MBB) : 0);
+
+ // Check for the loop form, in which operand 5 is the trip count.
+ if (MI->getNumExplicitOperands() > 5) {
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
+
+ uint64_t StartCountReg = MI->getOperand(5).getReg();
+ uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
+ uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
+ forceReg(MI, DestBase, TII));
+
+ const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
+ uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+ MRI.createVirtualRegister(RC));
+ uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
+ MRI.createVirtualRegister(RC));
+
+ RC = &SystemZ::GR64BitRegClass;
+ uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
+ uint64_t NextCountReg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
+ // [ %NextDestReg, NextMBB ]
+ // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
+ // [ %NextSrcReg, NextMBB ]
+ // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
+ // [ %NextCountReg, NextMBB ]
+ // ( PFD 2, 768+DestDisp(%ThisDestReg) )
+ // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
+ // ( JLH EndMBB )
+ //
+ // The prefetch is used only for MVC. The JLH is used only for CLC.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
+ .addReg(StartDestReg).addMBB(StartMBB)
+ .addReg(NextDestReg).addMBB(NextMBB);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
+ .addReg(StartSrcReg).addMBB(StartMBB)
+ .addReg(NextSrcReg).addMBB(NextMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
+ .addReg(StartCountReg).addMBB(StartMBB)
+ .addReg(NextCountReg).addMBB(NextMBB);
+ if (Opcode == SystemZ::MVC)
+ BuildMI(MBB, DL, TII->get(SystemZ::PFD))
+ .addImm(SystemZ::PFD_WRITE)
+ .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
+ .addReg(ThisSrcReg).addImm(SrcDisp);
+ if (EndMBB) {
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ }
+
+ // NextMBB:
+ // %NextDestReg = LA 256(%ThisDestReg)
+ // %NextSrcReg = LA 256(%ThisSrcReg)
+ // %NextCountReg = AGHI %ThisCountReg, -1
+ // CGHI %NextCountReg, 0
+ // JLH LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
+ MBB = NextMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
+ .addReg(ThisDestReg).addImm(256).addReg(0);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
+ .addReg(ThisSrcReg).addImm(256).addReg(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
+ .addReg(ThisCountReg).addImm(-1);
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(NextCountReg).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DestBase = MachineOperand::CreateReg(NextDestReg, false);
+ SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
+ Length &= 255;
+ MBB = DoneMBB;
+ }
+ // Handle any remaining bytes with straight-line code.
+ while (Length > 0) {
+ uint64_t ThisLength = std::min(Length, uint64_t(256));
+ // The previous iteration might have created out-of-range displacements.
+ // Apply them using LAY if so.
+ if (!isUInt<12>(DestDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(DestBase).addImm(DestDisp).addReg(0);
+ DestBase = MachineOperand::CreateReg(Reg, false);
+ DestDisp = 0;
+ }
+ if (!isUInt<12>(SrcDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
+ SrcBase = MachineOperand::CreateReg(Reg, false);
+ SrcDisp = 0;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(Opcode))
+ .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
+ .addOperand(SrcBase).addImm(SrcDisp);
+ DestDisp += ThisLength;
+ SrcDisp += ThisLength;
+ Length -= ThisLength;
+ // If there's another CLC to go, branch to the end if a difference
+ // was found.
+ if (EndMBB && Length > 0) {
+ MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ MBB = NextMBB;
+ }
+ }
+ if (EndMBB) {
+ MBB->addSuccessor(EndMBB);
+ MBB = EndMBB;
+ MBB->addLiveIn(SystemZ::CC);
+ }
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+// Decompose string pseudo-instruction MI into a loop that continually performs
+// Opcode until CC != 3.
+MachineBasicBlock *
+SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ uint64_t End1Reg = MI->getOperand(0).getReg();
+ uint64_t Start1Reg = MI->getOperand(1).getReg();
+ uint64_t Start2Reg = MI->getOperand(2).getReg();
+ uint64_t CharReg = MI->getOperand(3).getReg();
+
+ const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
+ uint64_t This1Reg = MRI.createVirtualRegister(RC);
+ uint64_t This2Reg = MRI.createVirtualRegister(RC);
+ uint64_t End2Reg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
+ // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
+ // R0L = %CharReg
+ // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
+ // JO LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The load of R0L can be hoisted by post-RA LICM.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
+ .addReg(Start1Reg).addMBB(StartMBB)
+ .addReg(End1Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
+ .addReg(Start2Reg).addMBB(StartMBB)
+ .addReg(End2Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
+ .addReg(This1Reg).addReg(This2Reg);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DoneMBB->addLiveIn(SystemZ::CC);
+
+ MI->eraseFromParent();
+ return DoneMBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
+ case SystemZ::Select32Mux:
case SystemZ::Select32:
case SystemZ::SelectF32:
case SystemZ::Select64:
@@ -2051,12 +3012,45 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::SelectF128:
return emitSelect(MI, MBB);
+ case SystemZ::CondStore8Mux:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
+ case SystemZ::CondStore8MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
+ case SystemZ::CondStore16Mux:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
+ case SystemZ::CondStore16MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
+ case SystemZ::CondStore8:
+ return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
+ case SystemZ::CondStore8Inv:
+ return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
+ case SystemZ::CondStore16:
+ return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
+ case SystemZ::CondStore16Inv:
+ return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
+ case SystemZ::CondStore32:
+ return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
+ case SystemZ::CondStore32Inv:
+ return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
+ case SystemZ::CondStore64:
+ return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
+ case SystemZ::CondStore64Inv:
+ return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
+ case SystemZ::CondStoreF32:
+ return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
+ case SystemZ::CondStoreF32Inv:
+ return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
+ case SystemZ::CondStoreF64:
+ return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
+ case SystemZ::CondStoreF64Inv:
+ return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
+
case SystemZ::AEXT128_64:
- return emitExt128(MI, MBB, false, SystemZ::subreg_low);
+ return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
case SystemZ::ZEXT128_32:
- return emitExt128(MI, MBB, true, SystemZ::subreg_low32);
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
case SystemZ::ZEXT128_64:
- return emitExt128(MI, MBB, true, SystemZ::subreg_low);
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
case SystemZ::ATOMIC_SWAPW:
return emitAtomicLoadBinary(MI, MBB, 0, 0);
@@ -2092,98 +3086,98 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_LOADW_NR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
case SystemZ::ATOMIC_LOADW_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
case SystemZ::ATOMIC_LOAD_NR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
- case SystemZ::ATOMIC_LOAD_NILL32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32);
- case SystemZ::ATOMIC_LOAD_NILH32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32);
- case SystemZ::ATOMIC_LOAD_NILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32);
- case SystemZ::ATOMIC_LOAD_NGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
case SystemZ::ATOMIC_LOAD_NILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
case SystemZ::ATOMIC_LOAD_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64);
- case SystemZ::ATOMIC_LOAD_NIHL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64);
- case SystemZ::ATOMIC_LOAD_NIHH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
case SystemZ::ATOMIC_LOAD_NILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64);
- case SystemZ::ATOMIC_LOAD_NIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
+ case SystemZ::ATOMIC_LOAD_NGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
+ case SystemZ::ATOMIC_LOAD_NILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
+ case SystemZ::ATOMIC_LOAD_NILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_NILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
case SystemZ::ATOMIC_LOADW_OR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
case SystemZ::ATOMIC_LOADW_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
case SystemZ::ATOMIC_LOAD_OR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
- case SystemZ::ATOMIC_LOAD_OILL32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32);
- case SystemZ::ATOMIC_LOAD_OILH32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32);
- case SystemZ::ATOMIC_LOAD_OILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32);
- case SystemZ::ATOMIC_LOAD_OGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
case SystemZ::ATOMIC_LOAD_OILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
case SystemZ::ATOMIC_LOAD_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64);
- case SystemZ::ATOMIC_LOAD_OIHL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64);
- case SystemZ::ATOMIC_LOAD_OIHH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
case SystemZ::ATOMIC_LOAD_OILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64);
- case SystemZ::ATOMIC_LOAD_OIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
+ case SystemZ::ATOMIC_LOAD_OGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
+ case SystemZ::ATOMIC_LOAD_OILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
+ case SystemZ::ATOMIC_LOAD_OILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_OILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
case SystemZ::ATOMIC_LOADW_XR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
case SystemZ::ATOMIC_LOADW_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
case SystemZ::ATOMIC_LOAD_XR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
- case SystemZ::ATOMIC_LOAD_XILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32);
+ case SystemZ::ATOMIC_LOAD_XILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
case SystemZ::ATOMIC_LOAD_XGR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
- case SystemZ::ATOMIC_LOAD_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64);
- case SystemZ::ATOMIC_LOAD_XIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64);
+ case SystemZ::ATOMIC_LOAD_XILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
+ case SystemZ::ATOMIC_LOAD_XIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
case SystemZ::ATOMIC_LOADW_NRi:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
case SystemZ::ATOMIC_LOADW_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
case SystemZ::ATOMIC_LOAD_NRi:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
- case SystemZ::ATOMIC_LOAD_NILL32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true);
- case SystemZ::ATOMIC_LOAD_NILH32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true);
- case SystemZ::ATOMIC_LOAD_NILF32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true);
- case SystemZ::ATOMIC_LOAD_NGRi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
case SystemZ::ATOMIC_LOAD_NILLi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
case SystemZ::ATOMIC_LOAD_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHLi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
case SystemZ::ATOMIC_LOAD_NILFi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHFi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
+ case SystemZ::ATOMIC_LOAD_NGRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
case SystemZ::ATOMIC_LOADW_MIN:
return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
@@ -2227,6 +3221,27 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
+ case SystemZ::MVCSequence:
+ case SystemZ::MVCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
+ case SystemZ::NCSequence:
+ case SystemZ::NCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::NC);
+ case SystemZ::OCSequence:
+ case SystemZ::OCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::OC);
+ case SystemZ::XCSequence:
+ case SystemZ::XCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::XC);
+ case SystemZ::CLCSequence:
+ case SystemZ::CLCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
+ case SystemZ::CLSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::CLST);
+ case SystemZ::MVSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::MVST);
+ case SystemZ::SRSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::SRST);
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index eea820c..c6dcca6 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_SystemZ_ISELLOWERING_H
#include "SystemZ.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -31,17 +32,32 @@ namespace SystemZISD {
// is the target address. The arguments start at operand 2.
// There is an optional glue operand at the end.
CALL,
+ SIBCALL,
// Wraps a TargetGlobalAddress that should be loaded using PC-relative
// accesses (LARL). Operand 0 is the address.
PCREL_WRAPPER,
- // Signed integer and floating-point comparisons. The operands are the
- // two values to compare.
- CMP,
+ // Used in cases where an offset is applied to a TargetGlobalAddress.
+ // Operand 0 is the full TargetGlobalAddress and operand 1 is a
+ // PCREL_WRAPPER for an anchor point. This is used so that we can
+ // cheaply refer to either the full address or the anchor point
+ // as a register base.
+ PCREL_OFFSET,
- // Likewise unsigned integer comparison.
- UCMP,
+ // Integer comparisons. There are three operands: the two values
+ // to compare, and an integer of type SystemZICMP.
+ ICMP,
+
+ // Floating-point comparisons. The two operands are the values to compare.
+ FCMP,
+
+ // Test under mask. The first operand is ANDed with the second operand
+ // and the condition codes are set on the result. The third operand is
+ // a boolean that is true if the condition codes need to distinguish
+ // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
+ // register forms do but the memory forms don't).
+ TM,
// Branches if a condition is true. Operand 0 is the chain operand;
// operand 1 is the 4-bit condition-code mask, with bit N in
@@ -67,10 +83,55 @@ namespace SystemZISD {
// first input operands are GR128s. The trailing numbers are the
// widths of the second operand in bits.
UMUL_LOHI64,
+ SDIVREM32,
SDIVREM64,
UDIVREM32,
UDIVREM64,
+ // Use a series of MVCs to copy bytes from one memory location to another.
+ // The operands are:
+ // - the target address
+ // - the source address
+ // - the constant length
+ //
+ // This isn't a memory opcode because we'd need to attach two
+ // MachineMemOperands rather than one.
+ MVC,
+
+ // Like MVC, but implemented as a loop that handles X*256 bytes
+ // followed by straight-line code to handle the rest (if any).
+ // The value of X is passed as an additional operand.
+ MVC_LOOP,
+
+ // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
+ NC,
+ NC_LOOP,
+ OC,
+ OC_LOOP,
+ XC,
+ XC_LOOP,
+
+ // Use CLC to compare two blocks of memory, with the same comments
+ // as for MVC and MVC_LOOP.
+ CLC,
+ CLC_LOOP,
+
+ // Use an MVST-based sequence to implement stpcpy().
+ STPCPY,
+
+ // Use a CLST-based sequence to implement strcmp(). The two input operands
+ // are the addresses of the strings to compare.
+ STRCMP,
+
+ // Use an SRST-based sequence to search a block of memory. The first
+ // operand is the end address, the second is the start, and the third
+ // is the character to search for. CC is set to 1 on success and 2
+ // on failure.
+ SEARCH_STRING,
+
+ // Store the CC value in bits 29 and 28 of an integer.
+ IPM,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -102,7 +163,27 @@ namespace SystemZISD {
// operand into the high bits
// Operand 4: the negative of operand 2, for rotating the other way
// Operand 5: the width of the field in bits (8 or 16)
- ATOMIC_CMP_SWAPW
+ ATOMIC_CMP_SWAPW,
+
+ // Prefetch from the second operand using the 4-bit control code in
+ // the first operand. The code is 1 for a load prefetch and 2 for
+ // a store prefetch.
+ PREFETCH
+ };
+
+ // Return true if OPCODE is some kind of PC-relative address.
+ inline bool isPCREL(unsigned Opcode) {
+ return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
+ }
+}
+
+namespace SystemZICMP {
+ // Describes whether an integer comparison needs to be signed or unsigned,
+ // or whether either type is OK.
+ enum {
+ Any,
+ UnsignedOnly,
+ SignedOnly
};
}
@@ -117,17 +198,19 @@ public:
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE {
return MVT::i32;
}
- virtual EVT getSetCCResultType(EVT VT) const {
- return MVT::i32;
- }
- virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE {
- return true;
- }
- virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ virtual EVT getSetCCResultType(LLVMContext &, EVT) const LLVM_OVERRIDE;
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE;
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const LLVM_OVERRIDE;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const
+ LLVM_OVERRIDE;
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const
+ LLVM_OVERRIDE;
+ virtual bool isTruncateFree(Type *, Type *) const LLVM_OVERRIDE;
+ virtual bool isTruncateFree(EVT, EVT) const LLVM_OVERRIDE;
virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
virtual std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const LLVM_OVERRIDE;
+ MVT VT) const LLVM_OVERRIDE;
virtual TargetLowering::ConstraintType
getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE;
virtual TargetLowering::ConstraintWeight
@@ -143,11 +226,13 @@ public:
MachineBasicBlock *BB) const LLVM_OVERRIDE;
virtual SDValue LowerOperation(SDValue Op,
SelectionDAG &DAG) const LLVM_OVERRIDE;
+ virtual bool allowTruncateForTailCall(Type *, Type *) const LLVM_OVERRIDE;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const LLVM_OVERRIDE;
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
+ SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE;
virtual SDValue
LowerCall(CallLoweringInfo &CLI,
@@ -158,13 +243,14 @@ public:
CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE;
+ SDLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE;
private:
const SystemZSubtarget &Subtarget;
const SystemZTargetMachine &TM;
// Implement LowerOperation for individual opcodes.
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
@@ -178,6 +264,7 @@ private:
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
@@ -188,10 +275,24 @@ private:
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+
+ // If the last instruction before MBBI in MBB was some form of COMPARE,
+ // try to replace it with a COMPARE AND BRANCH just before MBBI.
+ // CCMask and Target are the BRC-like operands for the branch.
+ // Return true if the change was made.
+ bool convertPrevCompareToBranch(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned CCMask,
+ MachineBasicBlock *Target) const;
// Implement EmitInstrWithCustomInserter for individual operation types.
MachineBasicBlock *emitSelect(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitCondStore(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned StoreOpcode, unsigned STOCOpcode,
+ bool Invert) const;
MachineBasicBlock *emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
bool ClearEven, unsigned SubReg) const;
@@ -206,6 +307,12 @@ private:
unsigned BitSize) const;
MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode) const;
+ MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 7c9f0e6..6080046 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Control-flow instructions
+// Select instructions
//===----------------------------------------------------------------------===//
// C's ?: operator for floating-point operands.
@@ -16,6 +16,11 @@ def SelectF32 : SelectWrapper<FP32>;
def SelectF64 : SelectWrapper<FP64>;
def SelectF128 : SelectWrapper<FP128>;
+defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
//===----------------------------------------------------------------------===//
// Move instructions
//===----------------------------------------------------------------------===//
@@ -29,57 +34,69 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
// Moves between two floating-point registers.
let neverHasSideEffects = 1 in {
- def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>;
- def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>;
- def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
+ def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>;
+ def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>;
+ def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>;
+}
+
+// Moves between two floating-point registers that also set the condition
+// codes.
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ defm LTEBR : LoadAndTestRRE<"lteb", 0xB302, FP32>;
+ defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
+ defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
}
+def : CompareZeroFP<LTEBRCompare, FP32>;
+def : CompareZeroFP<LTDBRCompare, FP64>;
+def : CompareZeroFP<LTXBRCompare, FP128>;
// Moves between 64-bit integer and floating-point registers.
-def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
-def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
+def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
+def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>;
// fcopysign with an FP32 result.
let isCodeGenOnly = 1 in {
- def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>;
- def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>;
+ def CPSDRss : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP32>;
+ def CPSDRsd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP64>;
}
-// The sign of an FP128 is in the high register. Give the CPSDRsd
-// operands in R1, R2, R3 order.
+// The sign of an FP128 is in the high register.
def : Pat<(fcopysign FP32:$src1, FP128:$src2),
- (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>;
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
// fcopysign with an FP64 result.
let isCodeGenOnly = 1 in
- def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>;
-def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>;
+ def CPSDRds : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP32>;
+def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>;
-// The sign of an FP128 is in the high register. Give the CPSDRdd
-// operands in R1, R2, R3 order.
+// The sign of an FP128 is in the high register.
def : Pat<(fcopysign FP64:$src1, FP128:$src2),
- (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>;
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
// the low half as-is.
class CopySign128<RegisterOperand cls, dag upper>
: Pat<(fcopysign FP128:$src1, cls:$src2),
- (INSERT_SUBREG FP128:$src1, upper, subreg_high)>;
+ (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>;
-// Give the CPSDR* operands in R1, R2, R3 order.
-def : CopySign128<FP32, (CPSDRds FP32:$src2,
- (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
-def : CopySign128<FP64, (CPSDRdd FP64:$src2,
- (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
-def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high),
- (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP32:$src2)>;
+def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP64:$src2)>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+
+defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
+defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
+defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>;
//===----------------------------------------------------------------------===//
// Load instructions
//===----------------------------------------------------------------------===//
let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
- defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32>;
- defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>;
+ defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
+ defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
// These instructions are split after register allocation, so we don't
// want a custom inserter.
@@ -94,8 +111,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
//===----------------------------------------------------------------------===//
let SimpleBDXStore = 1 in {
- defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>;
- defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>;
+ defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>;
+ defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>;
// These instructions are split after register allocation, so we don't
// want a custom inserter.
@@ -112,201 +129,232 @@ let SimpleBDXStore = 1 in {
// Convert floating-point values to narrower representations, rounding
// according to the current mode. The destination of LEXBR and LDXBR
// is a 128-bit value, but only the first register of the pair is used.
-def LEDBR : UnaryRRE<"ledbr", 0xB344, fround, FP32, FP64>;
-def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
-def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>;
+def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>;
+def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>;
def : Pat<(f32 (fround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>;
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
def : Pat<(f64 (fround FP128:$src)),
- (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>;
+ (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
// Extend register floating-point values to wider representations.
-def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64, FP32>;
-def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>;
-def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>;
+def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64, FP32>;
+def LXEBR : UnaryRRE<"lxeb", 0xB306, fextend, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdb", 0xB305, fextend, FP128, FP64>;
// Extend memory floating-point values to wider representations.
-def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>;
-def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>;
-def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>;
+def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
+def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>;
+def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>;
// Convert a signed integer register value to a floating-point one.
-let Defs = [PSW] in {
- def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
- def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
- def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
-
- def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>;
- def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>;
- def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
-}
+def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32, GR32>;
+def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64, GR32>;
+def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>;
+
+def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32, GR64>;
+def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64, GR64>;
+def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>;
// Convert a floating-point register value to a signed integer value,
// with the second operand (modifier M3) specifying the rounding mode.
-let Defs = [PSW] in {
- def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>;
- def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>;
- def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>;
-
- def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>;
- def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>;
- def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>;
+let Defs = [CC] in {
+ def CFEBR : UnaryRRF<"cfeb", 0xB398, GR32, FP32>;
+ def CFDBR : UnaryRRF<"cfdb", 0xB399, GR32, FP64>;
+ def CFXBR : UnaryRRF<"cfxb", 0xB39A, GR32, FP128>;
+
+ def CGEBR : UnaryRRF<"cgeb", 0xB3A8, GR64, FP32>;
+ def CGDBR : UnaryRRF<"cgdb", 0xB3A9, GR64, FP64>;
+ def CGXBR : UnaryRRF<"cgxb", 0xB3AA, GR64, FP128>;
}
// fp_to_sint always rounds towards zero, which is modifier value 5.
-def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR FP32:$src, 5)>;
-def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR FP64:$src, 5)>;
-def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>;
+def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>;
+def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>;
+def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>;
-def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR FP32:$src, 5)>;
-def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR FP64:$src, 5)>;
-def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>;
+def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>;
+def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>;
+def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
//===----------------------------------------------------------------------===//
// Unary arithmetic
//===----------------------------------------------------------------------===//
// Negation (Load Complement).
-let Defs = [PSW] in {
- def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32, FP32>;
- def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64, FP64>;
- def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>;
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>;
+ def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>;
+ def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>;
}
// Absolute value (Load Positive).
-let Defs = [PSW] in {
- def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32, FP32>;
- def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64, FP64>;
- def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>;
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>;
+ def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>;
+ def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>;
}
// Negative absolute value (Load Negative).
-let Defs = [PSW] in {
- def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32, FP32>;
- def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64, FP64>;
- def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>;
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>;
+ def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>;
+ def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>;
}
// Square root.
-def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>;
-def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>;
-def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>;
+def SQDBR : UnaryRRE<"sqdb", 0xB315, fsqrt, FP64, FP64>;
+def SQXBR : UnaryRRE<"sqxb", 0xB316, fsqrt, FP128, FP128>;
-def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32>;
-def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>;
+def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
+def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
// Round to an integer, with the second operand (modifier M3) specifying
-// the rounding mode.
-//
-// These forms always check for inexact conditions. z196 added versions
-// that allow this to suppressed (as for fnearbyint), but we don't yet
-// support -march=z196.
-let Defs = [PSW] in {
- def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32, FP32>;
- def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64, FP64>;
- def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>;
-}
+// the rounding mode. These forms always check for inexact conditions.
+def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>;
+def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>;
+def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>;
+
+// Extended forms of the previous three instructions. M4 can be set to 4
+// to suppress detection of inexact conditions.
+def FIEBRA : UnaryRRF4<"fiebra", 0xB357, FP32, FP32>,
+ Requires<[FeatureFPExtension]>;
+def FIDBRA : UnaryRRF4<"fidbra", 0xB35F, FP64, FP64>,
+ Requires<[FeatureFPExtension]>;
+def FIXBRA : UnaryRRF4<"fixbra", 0xB347, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
// frint rounds according to the current mode (modifier 0) and detects
// inexact conditions.
-def : Pat<(frint FP32:$src), (FIEBR FP32:$src, 0)>;
-def : Pat<(frint FP64:$src), (FIDBR FP64:$src, 0)>;
-def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>;
+def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>;
+def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>;
+def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+
+let Predicates = [FeatureFPExtension] in {
+ // fnearbyint is like frint but does not detect inexact conditions.
+ def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
+ def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
+ def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+
+ // floor is no longer allowed to raise an inexact condition,
+ // so restrict it to the cases where the condition can be suppressed.
+ // Mode 7 is round towards -inf.
+ def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
+ def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
+ def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+
+ // Same idea for ceil, where mode 6 is round towards +inf.
+ def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
+ def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
+ def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+
+ // Same idea for trunc, where mode 5 is round towards zero.
+ def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
+ def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
+ def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+
+ // Same idea for round, where mode 1 is round towards nearest with
+ // ties away from zero.
+ def : Pat<(frnd FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
+ def : Pat<(frnd FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
+ def : Pat<(frnd FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+}
//===----------------------------------------------------------------------===//
// Binary arithmetic
//===----------------------------------------------------------------------===//
// Addition.
-let Defs = [PSW] in {
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
let isCommutable = 1 in {
- def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>;
- def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>;
- def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+ def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32, FP32>;
+ def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64, FP64>;
+ def AXBR : BinaryRRE<"axb", 0xB34A, fadd, FP128, FP128>;
}
- def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>;
- def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>;
+ def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
+ def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
}
// Subtraction.
-let Defs = [PSW] in {
- def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>;
- def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>;
- def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def SEBR : BinaryRRE<"seb", 0xB30B, fsub, FP32, FP32>;
+ def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64, FP64>;
+ def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>;
- def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load>;
- def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load>;
+ def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>;
+ def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>;
}
// Multiplication.
let isCommutable = 1 in {
- def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>;
- def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>;
- def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>;
+ def MEEBR : BinaryRRE<"meeb", 0xB317, fmul, FP32, FP32>;
+ def MDBR : BinaryRRE<"mdb", 0xB31C, fmul, FP64, FP64>;
+ def MXBR : BinaryRRE<"mxb", 0xB34C, fmul, FP128, FP128>;
}
-def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>;
-def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load>;
+def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
+def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
// f64 multiplication of two FP32 registers.
-def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>;
def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- FP32:$src1, subreg_32bit), FP32:$src2)>;
+ FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
-def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>;
+def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
def : Pat<(fmul (f64 (fextend FP32:$src1)),
(f64 (extloadf32 bdxaddr12only:$addr))),
- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
-def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>;
def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))),
(MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
- FP64:$src1, subreg_high), FP64:$src2)>;
+ FP64:$src1, subreg_h64), FP64:$src2)>;
// f128 multiplication of an FP64 register and an f64 memory.
-def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>;
+def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
def : Pat<(fmul (f128 (fextend FP64:$src1)),
(f128 (extloadf64 bdxaddr12only:$addr))),
- (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high),
+ (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
bdxaddr12only:$addr)>;
// Fused multiply-add.
-def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>;
-def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>;
+def MAEBR : TernaryRRD<"maeb", 0xB30E, z_fma, FP32>;
+def MADBR : TernaryRRD<"madb", 0xB31E, z_fma, FP64>;
-def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>;
-def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>;
+def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>;
+def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>;
// Fused multiply-subtract.
-def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>;
-def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>;
+def MSEBR : TernaryRRD<"mseb", 0xB30F, z_fms, FP32>;
+def MSDBR : TernaryRRD<"msdb", 0xB31F, z_fms, FP64>;
-def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>;
-def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>;
+def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>;
+def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>;
// Division.
-def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>;
-def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>;
-def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+def DEBR : BinaryRRE<"deb", 0xB30D, fdiv, FP32, FP32>;
+def DDBR : BinaryRRE<"ddb", 0xB31D, fdiv, FP64, FP64>;
+def DXBR : BinaryRRE<"dxb", 0xB34D, fdiv, FP128, FP128>;
-def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>;
-def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>;
+def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
+def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
//===----------------------------------------------------------------------===//
// Comparisons
//===----------------------------------------------------------------------===//
-let Defs = [PSW] in {
- def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32, FP32>;
- def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64, FP64>;
- def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>;
+let Defs = [CC], CCValues = 0xF in {
+ def CEBR : CompareRRE<"ceb", 0xB309, z_fcmp, FP32, FP32>;
+ def CDBR : CompareRRE<"cdb", 0xB319, z_fcmp, FP64, FP64>;
+ def CXBR : CompareRRE<"cxb", 0xB349, z_fcmp, FP128, FP128>;
- def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>;
- def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>;
+ def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index b32b7eb..a8efe16 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -21,12 +21,24 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
let Pattern = pattern;
let AsmString = asmstr;
- // Used to identify a group of related instructions, such as ST and STY.
- string Function = "";
-
- // "12" for an instruction that has a ...Y equivalent, "20" for that
- // ...Y equivalent.
- string PairType = "none";
+ // Some instructions come in pairs, one having a 12-bit displacement
+ // and the other having a 20-bit displacement. Both instructions in
+ // the pair have the same DispKey and their DispSizes are "12" and "20"
+ // respectively.
+ string DispKey = "";
+ string DispSize = "none";
+
+ // Many register-based <INSN>R instructions have a memory-based <INSN>
+ // counterpart. OpKey uniquely identifies <INSN>, while OpType is
+ // "reg" for <INSN>R and "mem" for <INSN>.
+ string OpKey = "";
+ string OpType = "none";
+
+ // Many distinct-operands instructions have older 2-operand equivalents.
+ // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs,
+ // with NumOpsValue being "2" or "3" as appropriate.
+ string NumOpsKey = "";
+ string NumOpsValue = "none";
// True if this instruction is a simple D(X,B) load of a register
// (with no sign or zero extension).
@@ -46,11 +58,40 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
// operations.
bit Is128Bit = 0;
- let TSFlags{0} = SimpleBDXLoad;
- let TSFlags{1} = SimpleBDXStore;
- let TSFlags{2} = Has20BitOffset;
- let TSFlags{3} = HasIndex;
- let TSFlags{4} = Is128Bit;
+ // The access size of all memory operands in bytes, or 0 if not known.
+ bits<5> AccessBytes = 0;
+
+ // If the instruction sets CC to a useful value, this gives the mask
+ // of all possible CC results. The mask has the same form as
+ // SystemZ::CCMASK_*.
+ bits<4> CCValues = 0;
+
+ // The subset of CCValues that have the same meaning as they would after
+ // a comparison of the first operand against zero.
+ bits<4> CompareZeroCCMask = 0;
+
+ // True if the instruction is conditional and if the CC mask operand
+ // comes first (as for BRC, etc.).
+ bit CCMaskFirst = 0;
+
+ // Similar, but true if the CC mask operand comes last (as for LOC, etc.).
+ bit CCMaskLast = 0;
+
+ // True if the instruction is the "logical" rather than "arithmetic" form,
+ // in cases where a distinction exists.
+ bit IsLogical = 0;
+
+ let TSFlags{0} = SimpleBDXLoad;
+ let TSFlags{1} = SimpleBDXStore;
+ let TSFlags{2} = Has20BitOffset;
+ let TSFlags{3} = HasIndex;
+ let TSFlags{4} = Is128Bit;
+ let TSFlags{9-5} = AccessBytes;
+ let TSFlags{13-10} = CCValues;
+ let TSFlags{17-14} = CompareZeroCCMask;
+ let TSFlags{18} = CCMaskFirst;
+ let TSFlags{19} = CCMaskLast;
+ let TSFlags{20} = IsLogical;
}
//===----------------------------------------------------------------------===//
@@ -61,8 +102,8 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
// displacement.
def getDisp12Opcode : InstrMapping {
let FilterClass = "InstSystemZ";
- let RowFields = ["Function"];
- let ColFields = ["PairType"];
+ let RowFields = ["DispKey"];
+ let ColFields = ["DispSize"];
let KeyCol = ["20"];
let ValueCols = [["12"]];
}
@@ -70,37 +111,54 @@ def getDisp12Opcode : InstrMapping {
// Return the version of an instruction that has a signed 20-bit displacement.
def getDisp20Opcode : InstrMapping {
let FilterClass = "InstSystemZ";
- let RowFields = ["Function"];
- let ColFields = ["PairType"];
+ let RowFields = ["DispKey"];
+ let ColFields = ["DispSize"];
let KeyCol = ["12"];
let ValueCols = [["20"]];
}
+// Return the memory form of a register instruction.
+def getMemOpcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["OpKey"];
+ let ColFields = ["OpType"];
+ let KeyCol = ["reg"];
+ let ValueCols = [["mem"]];
+}
+
+// Return the 3-operand form of a 2-operand instruction.
+def getThreeOperandOpcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["NumOpsKey"];
+ let ColFields = ["NumOpsValue"];
+ let KeyCol = ["2"];
+ let ValueCols = [["3"]];
+}
+
//===----------------------------------------------------------------------===//
// Instruction formats
//===----------------------------------------------------------------------===//
//
// Formats are specified using operand field declarations of the form:
//
-// bits<4> Rn : register input or output for operand n
-// bits<m> In : immediate value of width m for operand n
-// bits<4> Bn : base register for address operand n
-// bits<m> Dn : displacement value of width m for address operand n
-// bits<4> Xn : index register for address operand n
-// bits<4> Mn : mode value for operand n
+// bits<4> Rn : register input or output for operand n
+// bits<m> In : immediate value of width m for operand n
+// bits<4> BDn : address operand n, which has a base and a displacement
+// bits<m> XBDn : address operand n, which has an index, a base and a
+// displacement
+// bits<4> Xn : index register for address operand n
+// bits<4> Mn : mode value for operand n
//
-// The operand numbers ("n" in the list above) follow the architecture manual,
-// but the fields are always declared in assembly order, so there are some
-// cases where operand "2" comes after operand "3". For address operands,
-// the base register field is declared first, followed by the displacement,
-// followed by the index (if any). This matches the bdaddr* and bdxaddr*
-// orders.
+// The operand numbers ("n" in the list above) follow the architecture manual.
+// Assembly operands sometimes have a different order; in particular, R3 often
+// is often written between operands 1 and 2.
//
//===----------------------------------------------------------------------===//
class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
bits<4> R1;
bits<16> I2;
@@ -111,9 +169,64 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{15-0} = I2;
}
+class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M3;
+ bits<16> RI4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R2;
+ let Inst{31-16} = RI4;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<8> I2;
+ bits<4> M3;
+ bits<16> RI4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = M3;
+ let Inst{31-16} = RI4;
+ let Inst{15-8} = I2;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<16> I2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R3;
+ let Inst{31-16} = I2;
+ let Inst{15-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
bits<4> R1;
bits<4> R2;
@@ -133,6 +246,7 @@ class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
bits<4> R1;
bits<32> I2;
@@ -146,6 +260,7 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<2, outs, ins, asmstr, pattern> {
field bits<16> Inst;
+ field bits<16> SoftFail = 0;
bits<4> R1;
bits<4> R2;
@@ -158,6 +273,7 @@ class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
bits<4> R1;
bits<4> R3;
@@ -173,6 +289,7 @@ class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
bits<4> R1;
bits<4> R2;
@@ -186,14 +303,16 @@ class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
bits<4> R1;
bits<4> R2;
bits<4> R3;
+ bits<4> R4;
let Inst{31-16} = op;
let Inst{15-12} = R3;
- let Inst{11-8} = 0;
+ let Inst{11-8} = R4;
let Inst{7-4} = R1;
let Inst{3-0} = R2;
}
@@ -201,17 +320,14 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
bits<4> R1;
- bits<4> B2;
- bits<12> D2;
- bits<4> X2;
+ bits<20> XBD2;
let Inst{31-24} = op;
let Inst{23-20} = R1;
- let Inst{19-16} = X2;
- let Inst{15-12} = B2;
- let Inst{11-0} = D2;
+ let Inst{19-0} = XBD2;
let HasIndex = 1;
}
@@ -219,17 +335,14 @@ class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
bits<4> R1;
- bits<4> B2;
- bits<12> D2;
- bits<4> X2;
+ bits<20> XBD2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
- let Inst{35-32} = X2;
- let Inst{31-28} = B2;
- let Inst{27-16} = D2;
+ let Inst{35-16} = XBD2;
let Inst{15-8} = 0;
let Inst{7-0} = op{7-0};
@@ -239,18 +352,15 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
bits<4> R1;
bits<4> R3;
- bits<4> B2;
- bits<12> D2;
- bits<4> X2;
+ bits<20> XBD2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R3;
- let Inst{35-32} = X2;
- let Inst{31-28} = B2;
- let Inst{27-16} = D2;
+ let Inst{35-16} = XBD2;
let Inst{15-12} = R1;
let Inst{11-8} = 0;
let Inst{7-0} = op{7-0};
@@ -261,18 +371,14 @@ class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
bits<4> R1;
- bits<4> B2;
- bits<20> D2;
- bits<4> X2;
+ bits<28> XBD2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
- let Inst{35-32} = X2;
- let Inst{31-28} = B2;
- let Inst{27-16} = D2{11-0};
- let Inst{15-8} = D2{19-12};
+ let Inst{35-8} = XBD2;
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
@@ -282,34 +388,31 @@ class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
bits<4> R1;
bits<4> R3;
- bits<4> B2;
- bits<12> D2;
+ bits<16> BD2;
let Inst{31-24} = op;
let Inst{23-20} = R1;
let Inst{19-16} = R3;
- let Inst{15-12} = B2;
- let Inst{11-0} = D2;
+ let Inst{15-0} = BD2;
}
class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
bits<4> R1;
bits<4> R3;
- bits<4> B2;
- bits<20> D2;
+ bits<24> BD2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = R3;
- let Inst{31-28} = B2;
- let Inst{27-16} = D2{11-0};
- let Inst{15-8} = D2{19-12};
+ let Inst{31-8} = BD2;
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
@@ -318,60 +421,77 @@ class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
- bits<4> B1;
- bits<12> D1;
+ bits<16> BD1;
bits<8> I2;
let Inst{31-24} = op;
let Inst{23-16} = I2;
- let Inst{15-12} = B1;
- let Inst{11-0} = D1;
+ let Inst{15-0} = BD1;
}
class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
- bits<4> B1;
- bits<12> D1;
+ bits<16> BD1;
bits<16> I2;
let Inst{47-32} = op;
- let Inst{31-28} = B1;
- let Inst{27-16} = D1;
+ let Inst{31-16} = BD1;
let Inst{15-0} = I2;
}
class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
+ field bits<48> SoftFail = 0;
- bits<4> B1;
- bits<20> D1;
+ bits<24> BD1;
bits<8> I2;
let Inst{47-40} = op{15-8};
let Inst{39-32} = I2;
- let Inst{31-28} = B1;
- let Inst{27-16} = D1{11-0};
- let Inst{15-8} = D1{19-12};
+ let Inst{31-8} = BD1;
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
}
+class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<24> BDL1;
+ bits<16> BD2;
+
+ let Inst{47-40} = op;
+ let Inst{39-16} = BDL1;
+ let Inst{15-0} = BD2;
+}
+
//===----------------------------------------------------------------------===//
// Instruction definitions with semantics
//===----------------------------------------------------------------------===//
//
-// These classes have the form <Category><Format>, where <Format> is one
+// These classes have the form [Cond]<Category><Format>, where <Format> is one
// of the formats defined above and where <Category> describes the inputs
-// and outputs. <Category> can be one of:
+// and outputs. "Cond" is used if the instruction is conditional,
+// in which case the 4-bit condition-code mask is added as a final operand.
+// <Category> can be one of:
//
// Inherent:
// One register output operand and no input operands.
//
+// BranchUnary:
+// One register output operand, one register input operand and
+// one branch displacement. The instructions stores a modified
+// form of the source register in the destination register and
+// branches on the result.
+//
// Store:
// One register or immediate input operand and one address input operand.
// The instruction stores the first operand to the address.
@@ -420,6 +540,10 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// One output operand and five input operands. The first two operands
// are registers and the other three are immediates.
//
+// Prefetch:
+// One 4-bit immediate operand and one address operand. The immediate
+// operand is 1 for a load prefetch and 2 for a store prefetch.
+//
// The format determines which input operands are tied to output operands,
// and also determines the shape of any address operand.
//
@@ -432,23 +556,32 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
dag src>
- : InstRRE<opcode, (outs cls:$dst), (ins),
- mnemonic#"\t$dst",
- [(set cls:$dst, src)]> {
+ : InstRRE<opcode, (outs cls:$R1), (ins),
+ mnemonic#"\t$R1",
+ [(set cls:$R1, src)]> {
let R2 = 0;
}
+class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
+ : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2),
+ mnemonic##"\t$R1, $I2", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
- : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr),
- mnemonic#"\t$dst1, $dst2, $addr", []> {
+ : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
let mayLoad = 1;
}
class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls>
- : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr),
- mnemonic#"\t$src, $addr",
- [(operator cls:$src, pcrel32:$addr)]> {
+ : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(operator cls:$R1, pcrel32:$I2)]> {
let mayStore = 1;
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
@@ -457,105 +590,206 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
}
class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- RegisterOperand cls, AddressingMode mode = bdxaddr12only>
- : InstRX<opcode, (outs), (ins cls:$src, mode:$addr),
- mnemonic#"\t$src, $addr",
- [(operator cls:$src, mode:$addr)]> {
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr12only>
+ : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
let mayStore = 1;
+ let AccessBytes = bytes;
}
class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, AddressingMode mode = bdxaddr20only>
- : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr),
- mnemonic#"\t$src, $addr",
- [(operator cls:$src, mode:$addr)]> {
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
let mayStore = 1;
+ let AccessBytes = bytes;
}
multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
- SDPatternOperator operator, RegisterOperand cls> {
- let Function = mnemonic ## #cls in {
- let PairType = "12" in
- def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
- let PairType = "20" in
- def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+ SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
+ let DispSize = "20" in
+ def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes,
+ bdxaddr20pair>;
}
}
class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
- : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr),
- mnemonic#"\t$from, $to, $addr", []> {
+ : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
let mayStore = 1;
}
+// StoreSI* instructions are used to store an integer to memory, but the
+// addresses are more restricted than for normal stores. If we are in the
+// situation of having to force either the address into a register or the
+// constant into a register, it's usually better to do the latter.
+// We therefore match the address in the same way as a normal store and
+// only use the StoreSI* instruction if the matched address is suitable.
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- Immediate imm, AddressingMode mode = bdaddr12only>
- : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(operator imm:$src, mode:$addr)]> {
+ Immediate imm>
+ : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
}
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- Immediate imm, AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(operator imm:$src, mode:$addr)]> {
+ Immediate imm>
+ : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator imm:$I2, mviaddr20pair:$BD1)]> {
let mayStore = 1;
}
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Immediate imm>
- : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(operator imm:$src, bdaddr12only:$addr)]> {
+ : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
}
multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, Immediate imm> {
- let Function = mnemonic in {
- let PairType = "12" in
- def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
- let PairType = "20" in
- def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+ let DispKey = mnemonic in {
+ let DispSize = "12" in
+ def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
+ let DispSize = "20" in
+ def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>;
}
}
+class CondStoreRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3),
+ mnemonic#"$R3\t$R1, $BD2", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
+// Like CondStoreRSY, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondStoreRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, uimm8zx4:$R3),
+ mnemonic#"\t$R1, $BD2, $R3", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+// Like CondStoreRSY, but with a fixed CC mask.
+class FixedCondStoreRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<4> ccmask, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2),
+ mnemonic#"\t$R1, $BD2", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+ let R3 = ccmask;
+}
+
class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
- : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src),
- mnemonic#"\t$dst, $src",
- [(set cls1:$dst, (operator cls2:$src))]>;
+ : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2),
+ mnemonic#"r\t$R1, $R2",
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+}
class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
- : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src),
- mnemonic#"\t$dst, $src",
- [(set cls1:$dst, (operator cls2:$src))]>;
+ : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2),
+ mnemonic#"r\t$R1, $R2",
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+}
class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode),
- mnemonic#"\t$dst, $mode, $src", []>;
+ : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2),
+ mnemonic#"r\t$R1, $R3, $R2", []> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+ let R4 = 0;
+}
+
+class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2, uimm8zx4:$R4),
+ mnemonic#"\t$R1, $R3, $R2, $R4", []>;
+
+// These instructions are generated by if conversion. The old value of R1
+// is added as an implicit use.
+class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3),
+ mnemonic#"r$R3\t$R1, $R2", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let CCMaskLast = 1;
+ let R4 = 0;
+}
+
+// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, uimm8zx4:$R3),
+ mnemonic#"r\t$R1, $R2, $R3", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let R4 = 0;
+}
+
+// Like CondUnaryRRF, but with a fixed CC mask.
+class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2, bits<4> ccmask>
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let R3 = ccmask;
+ let R4 = 0;
+}
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
- : InstRI<opcode, (outs cls:$dst), (ins imm:$src),
- mnemonic#"\t$dst, $src",
- [(set cls:$dst, (operator imm:$src))]>;
+ : InstRI<opcode, (outs cls:$R1), (ins imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator imm:$I2))]>;
class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
- : InstRIL<opcode, (outs cls:$dst), (ins imm:$src),
- mnemonic#"\t$dst, $src",
- [(set cls:$dst, (operator imm:$src))]>;
+ : InstRIL<opcode, (outs cls:$R1), (ins imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator imm:$I2))]>;
class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls>
- : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr),
- mnemonic#"\t$dst, $addr",
- [(set cls:$dst, (operator pcrel32:$addr))]> {
+ : InstRIL<opcode, (outs cls:$R1), (ins pcrel32:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator pcrel32:$I2))]> {
let mayLoad = 1;
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
@@ -563,148 +797,267 @@ class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
let AddedComplexity = 7;
}
+class CondUnaryRSY<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs cls:$R1),
+ (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
+ mnemonic#"$R3\t$R1, $BD2",
+ [(set cls:$R1,
+ (z_select_ccmask (load bdaddr20only:$BD2), cls:$R1src,
+ cond4:$valid, cond4:$R3))]>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
+// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, uimm8zx4:$R3),
+ mnemonic#"\t$R1, $BD2, $R3", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+// Like CondUnaryRSY, but with a fixed CC mask.
+class FixedCondUnaryRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<4> ccmask, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2),
+ mnemonic#"\t$R1, $BD2", []>,
+ Requires<[FeatureLoadStoreOnCond]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let R3 = ccmask;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- RegisterOperand cls, AddressingMode mode = bdxaddr12only>
- : InstRX<opcode, (outs cls:$dst), (ins mode:$addr),
- mnemonic#"\t$dst, $addr",
- [(set cls:$dst, (operator mode:$addr))]> {
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr12only>
+ : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
let mayLoad = 1;
+ let AccessBytes = bytes;
}
class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls>
- : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr),
- mnemonic#"\t$dst, $addr",
- [(set cls:$dst, (operator bdxaddr12only:$addr))]> {
+ RegisterOperand cls, bits<5> bytes>
+ : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
let mayLoad = 1;
+ let AccessBytes = bytes;
}
class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, AddressingMode mode = bdxaddr20only>
- : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr),
- mnemonic#"\t$dst, $addr",
- [(set cls:$dst, (operator mode:$addr))]> {
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
let mayLoad = 1;
+ let AccessBytes = bytes;
}
multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
- SDPatternOperator operator, RegisterOperand cls> {
- let Function = mnemonic ## #cls in {
- let PairType = "12" in
- def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
- let PairType = "20" in
- def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+ SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
+ let DispSize = "20" in
+ def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes,
+ bdxaddr20pair>;
}
}
class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
- : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"r\t$R1, $R2",
+ [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
}
class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
- : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"r\t$R1, $R2",
+ [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
}
-// Here the assembly and dag operands are in natural order,
-// but the first input operand maps to R3 and the second to R2.
-// This is used for "CPSDR R1, R3, R2", which is equivalent to
-// R1 = copysign (R3, R2).
-//
-// Direct uses of the instruction must pass operands in encoding order --
-// R1, R2, R3 -- so they must pass the source operands in reverse order.
-class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls1, RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1),
- mnemonic#"\t$dst, $src1, $src2",
- [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>;
+class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2),
+ mnemonic#"r\t$R1, $R3, $R2",
+ [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+ let R4 = 0;
+}
+
+class BinaryRRFK<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3),
+ mnemonic#"rk\t$R1, $R2, $R3",
+ [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> {
+ let R4 = 0;
+}
+
+multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls1,
+ RegisterOperand cls2> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>;
+ }
+}
+
+multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls1,
+ RegisterOperand cls2> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>;
+ }
+}
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
- : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2),
+ mnemonic#"\t$R1, $R3, $I2",
+ [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
+
+multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
+ }
}
class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
- : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
}
class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
- : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
let mayLoad = 1;
+ let AccessBytes = bytes;
}
class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load>
- : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls:$dst, (operator cls:$src1,
- (load bdxaddr12only:$src2)))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
+ : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator cls:$R1src,
+ (load bdxaddr12only:$XBD2)))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
let mayLoad = 1;
+ let AccessBytes = bytes;
}
class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
let mayLoad = 1;
+ let AccessBytes = bytes;
}
multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
- SDPatternOperator load> {
- let Function = mnemonic ## #cls in {
- let PairType = "12" in
- def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bdxaddr12pair>;
- let PairType = "20" in
- def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
+ SDPatternOperator load, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
+ bdxaddr12pair>;
+ let DispSize = "20" in
+ def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, bytes,
bdxaddr20pair>;
}
}
class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
Operand imm, AddressingMode mode = bdaddr12only>
- : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+ : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
let mayLoad = 1;
let mayStore = 1;
}
class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Operand imm, AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+ : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
let mayLoad = 1;
let mayStore = 1;
}
@@ -712,59 +1065,83 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
bits<16> siyOpcode, SDPatternOperator operator,
Operand imm> {
- let Function = mnemonic ## #cls in {
- let PairType = "12" in
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
- let PairType = "20" in
+ let DispSize = "20" in
def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
}
}
class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- RegisterOperand cls, AddressingMode mode>
- : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
- mnemonic#"\t$dst, $src2",
- [(set cls:$dst, (operator cls:$src1, mode:$src2))]> {
+ RegisterOperand cls>
+ : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
+ mnemonic#"\t$R1, $BD2",
+ [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
let R3 = 0;
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
}
class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, AddressingMode mode>
- : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
- mnemonic#"\t$dst, $src1, $src2",
- [(set cls:$dst, (operator cls:$src1, mode:$src2))]>;
+ RegisterOperand cls>
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
+
+multiclass ShiftRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : ShiftRSY<mnemonic##"k", opcode2, null_frag, cls>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : ShiftRS<mnemonic, opcode1, operator, cls>;
+ }
+}
class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
- : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls1:$src1, cls2:$src2)]>;
+ : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"r\t$R1, $R2",
+ [(operator cls1:$R1, cls2:$R2)]> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+ let isCompare = 1;
+}
class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
- : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls1:$src1, cls2:$src2)]>;
+ : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"r\t$R1, $R2",
+ [(operator cls1:$R1, cls2:$R2)]> {
+ let OpKey = mnemonic ## cls1;
+ let OpType = "reg";
+ let isCompare = 1;
+}
class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
- : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls:$src1, imm:$src2)]>;
+ : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
- : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls:$src1, imm:$src2)]>;
+ : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load>
- : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls:$src1, (load pcrel32:$src2))]> {
+ : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(operator cls:$R1, (load pcrel32:$I2))]> {
+ let isCompare = 1;
let mayLoad = 1;
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
@@ -773,77 +1150,92 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
}
class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
- : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls:$src1, (load mode:$src2))]> {
+ : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
+ let isCompare = 1;
let mayLoad = 1;
+ let AccessBytes = bytes;
}
class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load>
- : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls:$src1, (load bdxaddr12only:$src2))]> {
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
+ : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
+ let isCompare = 1;
let mayLoad = 1;
+ let AccessBytes = bytes;
}
class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2),
- mnemonic#"\t$src1, $src2",
- [(operator cls:$src1, (load mode:$src2))]> {
+ : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
+ let isCompare = 1;
let mayLoad = 1;
+ let AccessBytes = bytes;
}
multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
- SDPatternOperator load> {
- let Function = mnemonic ## #cls in {
- let PairType = "12" in
+ SDPatternOperator load, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
def "" : CompareRX<mnemonic, rxOpcode, operator, cls,
- load, bdxaddr12pair>;
- let PairType = "20" in
+ load, bytes, bdxaddr12pair>;
+ let DispSize = "20" in
def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls,
- load, bdxaddr20pair>;
+ load, bytes, bdxaddr20pair>;
}
}
class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
SDPatternOperator load, Immediate imm,
AddressingMode mode = bdaddr12only>
- : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(operator (load mode:$addr), imm:$src)]> {
+ : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator (load mode:$BD1), imm:$I2)]> {
+ let isCompare = 1;
let mayLoad = 1;
}
class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, Immediate imm>
- : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(operator (load bdaddr12only:$addr), imm:$src)]> {
+ : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator (load bdaddr12only:$BD1), imm:$I2)]> {
+ let isCompare = 1;
let mayLoad = 1;
}
class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, Immediate imm,
AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
- mnemonic#"\t$addr, $src",
- [(operator (load mode:$addr), imm:$src)]> {
+ : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator (load mode:$BD1), imm:$I2)]> {
+ let isCompare = 1;
let mayLoad = 1;
}
multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, SDPatternOperator load,
Immediate imm> {
- let Function = mnemonic in {
- let PairType = "12" in
+ let DispKey = mnemonic in {
+ let DispSize = "12" in
def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
- let PairType = "20" in
+ let DispSize = "20" in
def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm,
bdaddr20pair>;
}
@@ -851,65 +1243,94 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
class TernaryRRD<string mnemonic, bits<16> opcode,
SDPatternOperator operator, RegisterOperand cls>
- : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3),
- mnemonic#"\t$dst, $src2, $src3",
- [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
+ mnemonic#"r\t$R1, $R3, $R2",
+ [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "reg";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
}
class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load>
- : InstRXF<opcode, (outs cls:$dst),
- (ins cls:$src1, cls:$src2, bdxaddr12only:$src3),
- mnemonic#"\t$dst, $src2, $src3",
- [(set cls:$dst, (operator cls:$src1, cls:$src2,
- (load bdxaddr12only:$src3)))]> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
+ : InstRXF<opcode, (outs cls:$R1),
+ (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $R3, $XBD2",
+ [(set cls:$R1, (operator cls:$R1src, cls:$R3,
+ (load bdxaddr12only:$XBD2)))]> {
+ let OpKey = mnemonic ## cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
let mayLoad = 1;
+ let AccessBytes = bytes;
}
class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr12only>
- : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
- mnemonic#"\t$dst, $new, $ptr",
- [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
- let Constraints = "$old = $dst";
- let DisableEncoding = "$old";
+ : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
let mayLoad = 1;
let mayStore = 1;
}
class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr20only>
- : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
- mnemonic#"\t$dst, $new, $ptr",
- [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
- let Constraints = "$old = $dst";
- let DisableEncoding = "$old";
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
let mayLoad = 1;
let mayStore = 1;
}
multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
SDPatternOperator operator, RegisterOperand cls> {
- let Function = mnemonic ## #cls in {
- let PairType = "12" in
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;
- let PairType = "20" in
+ let DispSize = "20" in
def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>;
}
}
class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRIEf<opcode, (outs cls1:$dst),
- (ins cls1:$src1, cls2:$src2,
- uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3),
- mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> {
- let Constraints = "$src1 = $dst";
- let DisableEncoding = "$src1";
+ : InstRIEf<opcode, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
+ : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
+ mnemonic##"\t$R1, $XBD2",
+ [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
+
+class PrefetchRILPC<string mnemonic, bits<12> opcode,
+ SDPatternOperator operator>
+ : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
+ mnemonic##"\t$R1, $I2",
+ [(operator uimm8zx4:$R1, pcrel32:$I2)]> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+// A floating-point load-and test operation. Create both a normal unary
+// operation and one that acts as a comparison against zero.
+multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls> {
+ def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
+ let isCodeGenOnly = 1 in
+ def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
}
//===----------------------------------------------------------------------===//
@@ -928,17 +1349,130 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
let isCodeGenOnly = 1;
}
+// Like UnaryRI, but expanded after RA depending on the choice of register.
+class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins imm:$I2),
+ [(set cls:$R1, (operator imm:$I2))]>;
+
+// Like UnaryRXY, but expanded after RA depending on the choice of register.
+class UnaryRXYPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs cls:$R1), (ins mode:$XBD2),
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = key ## cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like UnaryRR, but expanded after RA depending on the choice of registers.
+class UnaryRRPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1), (ins cls2:$R2),
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = key ## cls1;
+ let OpType = "reg";
+}
+
+// Like BinaryRI, but expanded after RA depending on the choice of register.
+class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// Like BinaryRIE, but expanded after RA depending on the choice of register.
+class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
+ [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
+
+// Like BinaryRIAndK, but expanded after RA depending on the choice of register.
+multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm> {
+ let NumOpsKey = key in {
+ let NumOpsValue = "3" in
+ def K : BinaryRIEPseudo<null_frag, cls, imm>,
+ Requires<[FeatureHighWord, FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRIPseudo<operator, cls, imm>,
+ Requires<[FeatureHighWord]>;
+ }
+}
+
+// Like CompareRI, but expanded after RA depending on the choice of register.
+class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]>;
+
+// Like CompareRXY, but expanded after RA depending on the choice of register.
+class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like StoreRXY, but expanded after RA depending on the choice of register.
+class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let mayStore = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like RotateSelectRIEf, but expanded after RA depending on the choice
+// of registers.
+class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
// the value of the PSW's 2-bit condition code field.
class SelectWrapper<RegisterOperand cls>
- : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, i8imm:$cc),
- [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, imm:$cc))]> {
+ : Pseudo<(outs cls:$dst),
+ (ins cls:$src1, cls:$src2, uimm8zx4:$valid, uimm8zx4:$cc),
+ [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2,
+ uimm8zx4:$valid, uimm8zx4:$cc))]> {
let usesCustomInserter = 1;
// Although the instructions used by these nodes do not in themselves
- // change the PSW, the insertion requires new blocks, and the PSW cannot
- // be live across them.
- let Defs = [PSW];
- let Uses = [PSW];
+ // change CC, the insertion requires new blocks, and CC cannot be live
+ // across them.
+ let Defs = [CC];
+ let Uses = [CC];
+}
+
+// Stores $new to $addr if $cc is true ("" case) or false (Inv case).
+multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
+ SDPatternOperator load, AddressingMode mode> {
+ let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in {
+ def "" : Pseudo<(outs),
+ (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc),
+ [(store (z_select_ccmask cls:$new, (load mode:$addr),
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr)]>;
+ def Inv : Pseudo<(outs),
+ (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc),
+ [(store (z_select_ccmask (load mode:$addr), cls:$new,
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr)]>;
+ }
}
// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND
@@ -947,7 +1481,7 @@ class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
dag pat, DAGOperand operand>
: Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2),
[(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> {
- let Defs = [PSW];
+ let Defs = [CC];
let Has20BitOffset = 1;
let mayLoad = 1;
let mayStore = 1;
@@ -973,7 +1507,7 @@ class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
ADDR32:$negbitshift, uimm32:$bitsize),
[(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift,
ADDR32:$negbitshift, uimm32:$bitsize))]> {
- let Defs = [PSW];
+ let Defs = [CC];
let Has20BitOffset = 1;
let mayLoad = 1;
let mayStore = 1;
@@ -985,3 +1519,85 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator>
: AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
+
+// Define an instruction that operates on two fixed-length blocks of memory,
+// and associated pseudo instructions for operating on blocks of any size.
+// The Sequence form uses a straight-line sequence of instructions and
+// the Loop form uses a loop of length-256 instructions followed by
+// another instruction to handle the excess.
+multiclass MemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : InstSS<opcode, (outs), (ins bdladdr12onlylen8:$BDL1,
+ bdaddr12only:$BD2),
+ mnemonic##"\t$BDL1, $BD2", []>;
+ let usesCustomInserter = 1 in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256)]>;
+ }
+}
+
+// Define an instruction that operates on two strings, both terminated
+// by the character in R0. The instruction processes a CPU-determinated
+// number of bytes at a time and sets CC to 3 if the instruction needs
+// to be repeated. Also define a pseudo instruction that represents
+// the full loop (the main instruction plus the branch on CC==3).
+multiclass StringRRE<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator> {
+ def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2),
+ (ins GR64:$R1src, GR64:$R2src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+ }
+ let usesCustomInserter = 1 in
+ def Loop : Pseudo<(outs GR64:$end),
+ (ins GR64:$start1, GR64:$start2, GR32:$char),
+ [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
+ GR32:$char))]>;
+}
+
+// A pseudo instruction that is a direct alias of a real instruction.
+// These aliases are used in cases where a particular register operand is
+// fixed or where the same instruction is used with different register sizes.
+// The size parameter is the size in bytes of the associated real instruction.
+class Alias<int size, dag outs, dag ins, list<dag> pattern>
+ : InstSystemZ<size, outs, ins, "", pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+// An alias of a BinaryRI, but with different register sizes.
+class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a BinaryRIL, but with different register sizes.
+class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a CompareRI, but with different register sizes.
+class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
+
+// An alias of a RotateSelectRIEf, but with different register sizes.
+class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
+ : Alias<6, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> {
+ let Constraints = "$R1 = $R1src";
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 0718c83..acfeed8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -12,17 +12,37 @@
//===----------------------------------------------------------------------===//
#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
#include "SystemZInstrBuilder.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "SystemZGenInstrInfo.inc"
using namespace llvm;
+// Return a mask with Count low bits set.
+static uint64_t allOnes(unsigned int Count) {
+ return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
+}
+
+// Reg should be a 32-bit GPR. Return true if it is a high register rather
+// than a low register.
+static bool isHighReg(unsigned int Reg) {
+ if (SystemZ::GRH32BitRegClass.contains(Reg))
+ return true;
+ assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32");
+ return false;
+}
+
+// Pin the vtable to this file.
+void SystemZInstrInfo::anchor() {}
+
SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
: SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
- RI(tm, *this) {
+ RI(tm), TM(tm) {
}
// MI is a 128-bit load or store. Split it into two 64-bit loads or stores,
@@ -40,8 +60,8 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
// Set up the two 64-bit registers.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
- HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high));
- LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low));
+ HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
+ LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64));
// The address in the first (high) instruction is already correct.
// Adjust the offset in the second (low) instruction.
@@ -74,12 +94,104 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
OffsetMO.setImm(Offset);
}
+// MI is an RI-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32. ConvertHigh is true if LowOpcode takes a signed operand
+// and HighOpcode takes an unsigned 32-bit operand. In those cases,
+// MI has the same kind of operand as LowOpcode, so needs to be converted
+// if HighOpcode is used.
+void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode,
+ bool ConvertHigh) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ bool IsHigh = isHighReg(Reg);
+ MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode));
+ if (IsHigh && ConvertHigh)
+ MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm()));
+}
+
+// MI is a three-operand RIE-style pseudo instruction. Replace it with
+// LowOpcode3 if the registers are both low GR32s, otherwise use a move
+// followed by HighOpcode or LowOpcode, depending on whether the target
+// is a high or low GR32.
+void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned LowOpcodeK,
+ unsigned HighOpcode) const {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (!DestIsHigh && !SrcIsHigh)
+ MI->setDesc(get(LowOpcodeK));
+ else {
+ emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
+ DestReg, SrcReg, SystemZ::LR, 32,
+ MI->getOperand(1).isKill());
+ MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
+ MI->getOperand(1).setReg(DestReg);
+ }
+}
+
+// MI is an RXY-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32.
+void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
+ MI->getOperand(2).getImm());
+ MI->setDesc(get(Opcode));
+}
+
+// MI is an RR-style pseudo instruction that zero-extends the low Size bits
+// of one GRX32 into another. Replace it with LowOpcode if both operands
+// are low registers, otherwise use RISB[LH]G.
+void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned Size) const {
+ emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
+ MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
+ LowOpcode, Size, MI->getOperand(1).isKill());
+ MI->eraseFromParent();
+}
+
+// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
+// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg
+// are low registers, otherwise use RISB[LH]G. Size is the number of bits
+// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
+// KillSrc is true if this move is the last use of SrcReg.
+void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, unsigned LowLowOpcode,
+ unsigned Size, bool KillSrc) const {
+ unsigned Opcode;
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBHH;
+ else if (DestIsHigh && !SrcIsHigh)
+ Opcode = SystemZ::RISBHL;
+ else if (!DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBLH;
+ else {
+ BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
+ BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ .addReg(DestReg, RegState::Undef)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
+}
+
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
//
// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
-static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) {
+static int isSimpleMove(const MachineInstr *MI, int &FrameIndex,
+ unsigned Flag) {
const MCInstrDesc &MCID = MI->getDesc();
if ((MCID.TSFlags & Flag) &&
MI->getOperand(1).isFI() &&
@@ -101,6 +213,31 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
}
+bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI,
+ int &DestFrameIndex,
+ int &SrcFrameIndex) const {
+ // Check for MVC 0(Length,FI1),0(FI2)
+ const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo();
+ if (MI->getOpcode() != SystemZ::MVC ||
+ !MI->getOperand(0).isFI() ||
+ MI->getOperand(1).getImm() != 0 ||
+ !MI->getOperand(3).isFI() ||
+ MI->getOperand(4).getImm() != 0)
+ return false;
+
+ // Check that Length covers the full slots.
+ int64_t Length = MI->getOperand(2).getImm();
+ unsigned FI1 = MI->getOperand(0).getIndex();
+ unsigned FI2 = MI->getOperand(3).getIndex();
+ if (MFI->getObjectSize(FI1) != Length ||
+ MFI->getObjectSize(FI2) != Length)
+ return false;
+
+ DestFrameIndex = FI1;
+ SrcFrameIndex = FI2;
+ return true;
+}
+
bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -123,19 +260,22 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled by this
// analysis.
- unsigned ThisCond;
- const MachineOperand *ThisTarget;
- if (!isBranch(I, ThisCond, ThisTarget))
+ if (!I->isBranch())
return true;
// Can't handle indirect branches.
- if (!ThisTarget->isMBB())
+ SystemZII::Branch Branch(getBranchInfo(I));
+ if (!Branch.Target->isMBB())
+ return true;
+
+ // Punt on compound branches.
+ if (Branch.Type != SystemZII::BranchNormal)
return true;
- if (ThisCond == SystemZ::CCMASK_ANY) {
+ if (Branch.CCMask == SystemZ::CCMASK_ANY) {
// Handle unconditional branches.
if (!AllowModify) {
- TBB = ThisTarget->getMBB();
+ TBB = Branch.Target->getMBB();
continue;
}
@@ -147,7 +287,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
FBB = 0;
// Delete the JMP if it's equivalent to a fall-through.
- if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) {
+ if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) {
TBB = 0;
I->eraseFromParent();
I = MBB.end();
@@ -155,7 +295,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// TBB is used to indicate the unconditinal destination.
- TBB = ThisTarget->getMBB();
+ TBB = Branch.Target->getMBB();
continue;
}
@@ -163,26 +303,28 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
if (Cond.empty()) {
// FIXME: add X86-style branch swap
FBB = TBB;
- TBB = ThisTarget->getMBB();
- Cond.push_back(MachineOperand::CreateImm(ThisCond));
+ TBB = Branch.Target->getMBB();
+ Cond.push_back(MachineOperand::CreateImm(Branch.CCValid));
+ Cond.push_back(MachineOperand::CreateImm(Branch.CCMask));
continue;
}
// Handle subsequent conditional branches.
- assert(Cond.size() == 1);
- assert(TBB);
+ assert(Cond.size() == 2 && TBB && "Should have seen a conditional branch");
// Only handle the case where all conditional branches branch to the same
// destination.
- if (TBB != ThisTarget->getMBB())
+ if (TBB != Branch.Target->getMBB())
return true;
// If the conditions are the same, we can leave them alone.
- unsigned OldCond = Cond[0].getImm();
- if (OldCond == ThisCond)
+ unsigned OldCCValid = Cond[0].getImm();
+ unsigned OldCCMask = Cond[1].getImm();
+ if (OldCCValid == Branch.CCValid && OldCCMask == Branch.CCMask)
continue;
// FIXME: Try combining conditions like X86 does. Should be easy on Z!
+ return false;
}
return false;
@@ -197,11 +339,9 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
--I;
if (I->isDebugValue())
continue;
- unsigned Cond;
- const MachineOperand *Target;
- if (!isBranch(I, Cond, Target))
+ if (!I->isBranch())
break;
- if (!Target->isMBB())
+ if (!getBranchInfo(I).Target->isMBB())
break;
// Remove the branch.
I->eraseFromParent();
@@ -212,6 +352,13 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return Count;
}
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid condition");
+ Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm());
+ return false;
+}
+
unsigned
SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -223,30 +370,185 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 1 || Cond.size() == 0) &&
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
"SystemZ branch conditions have one component!");
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB);
return 1;
}
// Conditional branch.
unsigned Count = 0;
- unsigned CC = Cond[0].getImm();
- BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB);
+ unsigned CCValid = Cond[0].getImm();
+ unsigned CCMask = Cond[1].getImm();
+ BuildMI(&MBB, DL, get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask).addMBB(TBB);
++Count;
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB);
++Count;
}
return Count;
}
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ assert(MI->isCompare() && "Caller should have checked for a comparison");
+
+ if (MI->getNumExplicitOperands() == 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isImm()) {
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ Value = MI->getOperand(1).getImm();
+ Mask = ~0;
+ return true;
+ }
+
+ return false;
+}
+
+// If Reg is a virtual register, return its definition, otherwise return null.
+static MachineInstr *getDef(unsigned Reg,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+ return MRI->getUniqueVRegDef(Reg);
+}
+
+// Return true if MI is a shift of type Opcode by Imm bits.
+static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) {
+ return (MI->getOpcode() == Opcode &&
+ !MI->getOperand(2).getReg() &&
+ MI->getOperand(3).getImm() == Imm);
+}
+
+// If the destination of MI has no uses, delete it as dead.
+static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
+ if (MRI->use_nodbg_empty(MI->getOperand(0).getReg()))
+ MI->eraseFromParent();
+}
+
+// Compare compares SrcReg against zero. Check whether SrcReg contains
+// the result of an IPM sequence whose input CC survives until Compare,
+// and whether Compare is therefore redundant. Delete it and return
+// true if so.
+static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
+ const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+ MachineInstr *LGFR = 0;
+ MachineInstr *RLL = getDef(SrcReg, MRI);
+ if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
+ LGFR = RLL;
+ RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
+ }
+ if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
+ return false;
+
+ MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI);
+ if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC))
+ return false;
+
+ MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI);
+ if (!IPM || IPM->getOpcode() != SystemZ::IPM)
+ return false;
+
+ // Check that there are no assignments to CC between the IPM and Compare,
+ if (IPM->getParent() != Compare->getParent())
+ return false;
+ MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare;
+ for (++MBBI; MBBI != MBBE; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ if (MI->modifiesRegister(SystemZ::CC, TRI))
+ return false;
+ }
+
+ Compare->eraseFromParent();
+ if (LGFR)
+ eraseIfDead(LGFR, MRI);
+ eraseIfDead(RLL, MRI);
+ eraseIfDead(SRL, MRI);
+ eraseIfDead(IPM, MRI);
+
+ return true;
+}
+
+bool
+SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ assert(!SrcReg2 && "Only optimizing constant comparisons so far");
+ bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
+ if (Value == 0 &&
+ !IsLogical &&
+ removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo()))
+ return true;
+ return false;
+}
+
+// If Opcode is a move that has a conditional variant, return that variant,
+// otherwise return 0.
+static unsigned getConditionalMove(unsigned Opcode) {
+ switch (Opcode) {
+ case SystemZ::LR: return SystemZ::LOCR;
+ case SystemZ::LGR: return SystemZ::LOCGR;
+ default: return 0;
+ }
+}
+
+bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
+ if (TM.getSubtargetImpl()->hasLoadStoreOnCond() &&
+ getConditionalMove(Opcode))
+ return true;
+ return false;
+}
+
+bool SystemZInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ // For now only convert single instructions.
+ return NumCycles == 1;
+}
+
+bool SystemZInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumCyclesT, unsigned ExtraPredCyclesT,
+ MachineBasicBlock &FMBB,
+ unsigned NumCyclesF, unsigned ExtraPredCyclesF,
+ const BranchProbability &Probability) const {
+ // For now avoid converting mutually-exclusive cases.
+ return false;
+}
+
+bool SystemZInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ assert(Pred.size() == 2 && "Invalid condition");
+ unsigned CCValid = Pred[0].getImm();
+ unsigned CCMask = Pred[1].getImm();
+ assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
+ unsigned Opcode = MI->getOpcode();
+ if (TM.getSubtargetImpl()->hasLoadStoreOnCond()) {
+ if (unsigned CondOpcode = getConditionalMove(Opcode)) {
+ MI->setDesc(get(CondOpcode));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);;
+ return true;
+ }
+ }
+ return false;
+}
+
void
SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
@@ -254,18 +556,21 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
- copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high),
- RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc);
- copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low),
- RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc);
+ return;
+ }
+
+ if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
+ emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc);
return;
}
// Everything else needs only one instruction.
unsigned Opcode;
- if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
- Opcode = SystemZ::LR;
- else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
+ if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LGR;
else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LER;
@@ -313,6 +618,256 @@ SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
FrameIdx);
}
+// Return true if MI is a simple load or store with a 12-bit displacement
+// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
+static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ return ((MCID.TSFlags & Flag) &&
+ isUInt<12>(MI->getOperand(2).getImm()) &&
+ MI->getOperand(3).getReg() == 0);
+}
+
+namespace {
+ struct LogicOp {
+ LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {}
+ LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
+ : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
+
+ operator bool() const { return RegSize; }
+
+ unsigned RegSize, ImmLSB, ImmSize;
+ };
+}
+
+static LogicOp interpretAndImmediate(unsigned Opcode) {
+ switch (Opcode) {
+ case SystemZ::NILMux: return LogicOp(32, 0, 16);
+ case SystemZ::NIHMux: return LogicOp(32, 16, 16);
+ case SystemZ::NILL64: return LogicOp(64, 0, 16);
+ case SystemZ::NILH64: return LogicOp(64, 16, 16);
+ case SystemZ::NIHL64: return LogicOp(64, 32, 16);
+ case SystemZ::NIHH64: return LogicOp(64, 48, 16);
+ case SystemZ::NIFMux: return LogicOp(32, 0, 32);
+ case SystemZ::NILF64: return LogicOp(64, 0, 32);
+ case SystemZ::NIHF64: return LogicOp(64, 32, 32);
+ default: return LogicOp();
+ }
+}
+
+// Used to return from convertToThreeAddress after replacing two-address
+// instruction OldMI with three-address instruction NewMI.
+static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
+ MachineInstr *NewMI,
+ LiveVariables *LV) {
+ if (LV) {
+ unsigned NumOps = OldMI->getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = OldMI->getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI);
+ }
+ }
+ return NewMI;
+}
+
+MachineInstr *
+SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ unsigned Opcode = MI->getOpcode();
+ unsigned NumOps = MI->getNumOperands();
+
+ // Try to convert something like SLL into SLLK, if supported.
+ // We prefer to keep the two-operand form where possible both
+ // because it tends to be shorter and because some instructions
+ // have memory forms that can be used during spilling.
+ if (TM.getSubtargetImpl()->hasDistinctOps()) {
+ MachineOperand &Dest = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DestReg = Dest.getReg();
+ unsigned SrcReg = Src.getReg();
+ // AHIMux is only really a three-operand instruction when both operands
+ // are low registers. Try to constrain both operands to be low if
+ // possible.
+ if (Opcode == SystemZ::AHIMux &&
+ TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
+ MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
+ MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
+ MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
+ }
+ int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
+ if (ThreeOperandOpcode >= 0) {
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode))
+ .addOperand(Dest);
+ // Keep the kill state, but drop the tied flag.
+ MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
+ // Keep the remaining operands as-is.
+ for (unsigned I = 2; I < NumOps; ++I)
+ MIB.addOperand(MI->getOperand(I));
+ return finishConvertToThreeAddress(MI, MIB, LV);
+ }
+ }
+
+ // Try to convert an AND into an RISBG-type instruction.
+ if (LogicOp And = interpretAndImmediate(Opcode)) {
+ uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
+ // AND IMMEDIATE leaves the other bits of the register unchanged.
+ Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
+ unsigned Start, End;
+ if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
+ unsigned NewOpcode;
+ if (And.RegSize == 64)
+ NewOpcode = SystemZ::RISBG;
+ else {
+ NewOpcode = SystemZ::RISBMux;
+ Start &= 31;
+ End &= 31;
+ }
+ MachineOperand &Dest = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
+ .addOperand(Dest).addReg(0)
+ .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
+ .addImm(Start).addImm(End + 128).addImm(0);
+ return finishConvertToThreeAddress(MI, MIB, LV);
+ }
+ }
+ return 0;
+}
+
+MachineInstr *
+SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = MFI->getObjectSize(FrameIndex);
+ unsigned Opcode = MI->getOpcode();
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+ isInt<8>(MI->getOperand(2).getImm()) &&
+ !MI->getOperand(3).getReg()) {
+ // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+ return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex).addImm(0)
+ .addImm(MI->getOperand(2).getImm());
+ }
+ return 0;
+ }
+
+ // All other cases require a single operand.
+ if (Ops.size() != 1)
+ return 0;
+
+ unsigned OpNum = Ops[0];
+ assert(Size == MF.getRegInfo()
+ .getRegClass(MI->getOperand(OpNum).getReg())->getSize() &&
+ "Invalid size combination");
+
+ if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) &&
+ OpNum == 0 &&
+ isInt<8>(MI->getOperand(2).getImm())) {
+ // A(G)HI %reg, CONST -> A(G)SI %mem, CONST
+ Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI);
+ return BuildMI(MF, MI->getDebugLoc(), get(Opcode))
+ .addFrameIndex(FrameIndex).addImm(0)
+ .addImm(MI->getOperand(2).getImm());
+ }
+
+ if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
+ bool Op0IsGPR = (Opcode == SystemZ::LGDR);
+ bool Op1IsGPR = (Opcode == SystemZ::LDGR);
+ // If we're spilling the destination of an LDGR or LGDR, store the
+ // source register instead.
+ if (OpNum == 0) {
+ unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
+ return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode))
+ .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex)
+ .addImm(0).addReg(0);
+ }
+ // If we're spilling the source of an LDGR or LGDR, load the
+ // destination register instead.
+ if (OpNum == 1) {
+ unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
+ unsigned Dest = MI->getOperand(0).getReg();
+ return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest)
+ .addFrameIndex(FrameIndex).addImm(0).addReg(0);
+ }
+ }
+
+ // Look for cases where the source of a simple store or the destination
+ // of a simple load is being spilled. Try to use MVC instead.
+ //
+ // Although MVC is in practice a fast choice in these cases, it is still
+ // logically a bytewise copy. This means that we cannot use it if the
+ // load or store is volatile. We also wouldn't be able to use MVC if
+ // the two memories partially overlap, but that case cannot occur here,
+ // because we know that one of the memories is a full frame index.
+ //
+ // For performance reasons, we also want to avoid using MVC if the addresses
+ // might be equal. We don't worry about that case here, because spill slot
+ // coloring happens later, and because we have special code to remove
+ // MVCs that turn out to be redundant.
+ if (OpNum == 0 && MI->hasOneMemOperand()) {
+ MachineMemOperand *MMO = *MI->memoperands_begin();
+ if (MMO->getSize() == Size && !MMO->isVolatile()) {
+ // Handle conversion of loads.
+ if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) {
+ return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
+ .addFrameIndex(FrameIndex).addImm(0).addImm(Size)
+ .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
+ .addMemOperand(MMO);
+ }
+ // Handle conversion of stores.
+ if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) {
+ return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
+ .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
+ .addImm(Size).addFrameIndex(FrameIndex).addImm(0)
+ .addMemOperand(MMO);
+ }
+ }
+ }
+
+ // If the spilled operand is the final one, try to change <INSN>R
+ // into <INSN>.
+ int MemOpcode = SystemZ::getMemOpcode(Opcode);
+ if (MemOpcode >= 0) {
+ unsigned NumOps = MI->getNumExplicitOperands();
+ if (OpNum == NumOps - 1) {
+ const MCInstrDesc &MemDesc = get(MemOpcode);
+ uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
+ assert(AccessBytes != 0 && "Size of access should be known");
+ assert(AccessBytes <= Size && "Access outside the frame index");
+ uint64_t Offset = Size - AccessBytes;
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode));
+ for (unsigned I = 0; I < OpNum; ++I)
+ MIB.addOperand(MI->getOperand(I));
+ MIB.addFrameIndex(FrameIndex).addImm(Offset);
+ if (MemDesc.TSFlags & SystemZII::HasIndex)
+ MIB.addReg(0);
+ return MIB;
+ }
+ }
+
+ return 0;
+}
+
+MachineInstr *
+SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+}
+
bool
SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
switch (MI->getOpcode()) {
@@ -332,6 +887,138 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
splitMove(MI, SystemZ::STD);
return true;
+ case SystemZ::LBMux:
+ expandRXYPseudo(MI, SystemZ::LB, SystemZ::LBH);
+ return true;
+
+ case SystemZ::LHMux:
+ expandRXYPseudo(MI, SystemZ::LH, SystemZ::LHH);
+ return true;
+
+ case SystemZ::LLCRMux:
+ expandZExtPseudo(MI, SystemZ::LLCR, 8);
+ return true;
+
+ case SystemZ::LLHRMux:
+ expandZExtPseudo(MI, SystemZ::LLHR, 16);
+ return true;
+
+ case SystemZ::LLCMux:
+ expandRXYPseudo(MI, SystemZ::LLC, SystemZ::LLCH);
+ return true;
+
+ case SystemZ::LLHMux:
+ expandRXYPseudo(MI, SystemZ::LLH, SystemZ::LLHH);
+ return true;
+
+ case SystemZ::LMux:
+ expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
+ return true;
+
+ case SystemZ::STCMux:
+ expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
+ return true;
+
+ case SystemZ::STHMux:
+ expandRXYPseudo(MI, SystemZ::STH, SystemZ::STHH);
+ return true;
+
+ case SystemZ::STMux:
+ expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
+ return true;
+
+ case SystemZ::LHIMux:
+ expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true);
+ return true;
+
+ case SystemZ::IIFMux:
+ expandRIPseudo(MI, SystemZ::IILF, SystemZ::IIHF, false);
+ return true;
+
+ case SystemZ::IILMux:
+ expandRIPseudo(MI, SystemZ::IILL, SystemZ::IIHL, false);
+ return true;
+
+ case SystemZ::IIHMux:
+ expandRIPseudo(MI, SystemZ::IILH, SystemZ::IIHH, false);
+ return true;
+
+ case SystemZ::NIFMux:
+ expandRIPseudo(MI, SystemZ::NILF, SystemZ::NIHF, false);
+ return true;
+
+ case SystemZ::NILMux:
+ expandRIPseudo(MI, SystemZ::NILL, SystemZ::NIHL, false);
+ return true;
+
+ case SystemZ::NIHMux:
+ expandRIPseudo(MI, SystemZ::NILH, SystemZ::NIHH, false);
+ return true;
+
+ case SystemZ::OIFMux:
+ expandRIPseudo(MI, SystemZ::OILF, SystemZ::OIHF, false);
+ return true;
+
+ case SystemZ::OILMux:
+ expandRIPseudo(MI, SystemZ::OILL, SystemZ::OIHL, false);
+ return true;
+
+ case SystemZ::OIHMux:
+ expandRIPseudo(MI, SystemZ::OILH, SystemZ::OIHH, false);
+ return true;
+
+ case SystemZ::XIFMux:
+ expandRIPseudo(MI, SystemZ::XILF, SystemZ::XIHF, false);
+ return true;
+
+ case SystemZ::TMLMux:
+ expandRIPseudo(MI, SystemZ::TMLL, SystemZ::TMHL, false);
+ return true;
+
+ case SystemZ::TMHMux:
+ expandRIPseudo(MI, SystemZ::TMLH, SystemZ::TMHH, false);
+ return true;
+
+ case SystemZ::AHIMux:
+ expandRIPseudo(MI, SystemZ::AHI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::AHIMuxK:
+ expandRIEPseudo(MI, SystemZ::AHI, SystemZ::AHIK, SystemZ::AIH);
+ return true;
+
+ case SystemZ::AFIMux:
+ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::CFIMux:
+ expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false);
+ return true;
+
+ case SystemZ::CLFIMux:
+ expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false);
+ return true;
+
+ case SystemZ::CMux:
+ expandRXYPseudo(MI, SystemZ::C, SystemZ::CHF);
+ return true;
+
+ case SystemZ::CLMux:
+ expandRXYPseudo(MI, SystemZ::CL, SystemZ::CLHF);
+ return true;
+
+ case SystemZ::RISBMux: {
+ bool DestIsHigh = isHighReg(MI->getOperand(0).getReg());
+ bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg());
+ if (SrcIsHigh == DestIsHigh)
+ MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
+ else {
+ MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
+ MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32);
+ }
+ return true;
+ }
+
case SystemZ::ADJDYNALLOC:
splitAdjDynAlloc(MI);
return true;
@@ -341,32 +1028,60 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
}
}
-bool SystemZInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
- assert(Cond.size() == 1 && "Invalid branch condition!");
- Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY);
- return false;
+uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const {
+ if (MI->getOpcode() == TargetOpcode::INLINEASM) {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ return MI->getDesc().getSize();
}
-bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond,
- const MachineOperand *&Target) const {
+SystemZII::Branch
+SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
case SystemZ::BR:
case SystemZ::J:
case SystemZ::JG:
- Cond = SystemZ::CCMASK_ANY;
- Target = &MI->getOperand(0);
- return true;
+ return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY,
+ SystemZ::CCMASK_ANY, &MI->getOperand(0));
case SystemZ::BRC:
case SystemZ::BRCL:
- Cond = MI->getOperand(0).getImm();
- Target = &MI->getOperand(1);
- return true;
+ return SystemZII::Branch(SystemZII::BranchNormal,
+ MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm(), &MI->getOperand(2));
+
+ case SystemZ::BRCT:
+ return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP,
+ SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+
+ case SystemZ::BRCTG:
+ return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP,
+ SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+
+ case SystemZ::CIJ:
+ case SystemZ::CRJ:
+ return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
+
+ case SystemZ::CLIJ:
+ case SystemZ::CLRJ:
+ return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
+
+ case SystemZ::CGIJ:
+ case SystemZ::CGRJ:
+ return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
+
+ case SystemZ::CLGIJ:
+ case SystemZ::CLGRJ:
+ return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
default:
- assert(!MI->getDesc().isBranch() && "Unknown branch opcode");
- return false;
+ llvm_unreachable("Unrecognized branch opcode");
}
}
@@ -375,7 +1090,13 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
unsigned &StoreOpcode) const {
if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
LoadOpcode = SystemZ::L;
- StoreOpcode = SystemZ::ST32;
+ StoreOpcode = SystemZ::ST;
+ } else if (RC == &SystemZ::GRH32BitRegClass) {
+ LoadOpcode = SystemZ::LFH;
+ StoreOpcode = SystemZ::STFH;
+ } else if (RC == &SystemZ::GRX32BitRegClass) {
+ LoadOpcode = SystemZ::LMux;
+ StoreOpcode = SystemZ::STMux;
} else if (RC == &SystemZ::GR64BitRegClass ||
RC == &SystemZ::ADDR64BitRegClass) {
LoadOpcode = SystemZ::LG;
@@ -424,6 +1145,88 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
return 0;
}
+unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
+ switch (Opcode) {
+ case SystemZ::L: return SystemZ::LT;
+ case SystemZ::LY: return SystemZ::LT;
+ case SystemZ::LG: return SystemZ::LTG;
+ case SystemZ::LGF: return SystemZ::LTGF;
+ case SystemZ::LR: return SystemZ::LTR;
+ case SystemZ::LGFR: return SystemZ::LTGFR;
+ case SystemZ::LGR: return SystemZ::LTGR;
+ case SystemZ::LER: return SystemZ::LTEBR;
+ case SystemZ::LDR: return SystemZ::LTDBR;
+ case SystemZ::LXR: return SystemZ::LTXBR;
+ default: return 0;
+ }
+}
+
+// Return true if Mask matches the regexp 0*1+0*, given that zero masks
+// have already been filtered out. Store the first set bit in LSB and
+// the number of set bits in Length if so.
+static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) {
+ unsigned First = findFirstSet(Mask);
+ uint64_t Top = (Mask >> First) + 1;
+ if ((Top & -Top) == Top) {
+ LSB = First;
+ Length = findFirstSet(Top);
+ return true;
+ }
+ return false;
+}
+
+bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
+ unsigned &Start, unsigned &End) const {
+ // Reject trivial all-zero masks.
+ if (Mask == 0)
+ return false;
+
+ // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of
+ // the msb and End specifies the index of the lsb.
+ unsigned LSB, Length;
+ if (isStringOfOnes(Mask, LSB, Length)) {
+ Start = 63 - (LSB + Length - 1);
+ End = 63 - LSB;
+ return true;
+ }
+
+ // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb
+ // of the low 1s and End specifies the lsb of the high 1s.
+ if (isStringOfOnes(Mask ^ allOnes(BitSize), LSB, Length)) {
+ assert(LSB > 0 && "Bottom bit must be set");
+ assert(LSB + Length < BitSize && "Top bit must be set");
+ Start = 63 - (LSB - 1);
+ End = 63 - (LSB + Length);
+ return true;
+ }
+
+ return false;
+}
+
+unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode,
+ const MachineInstr *MI) const {
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRJ;
+ case SystemZ::CGR:
+ return SystemZ::CGRJ;
+ case SystemZ::CHI:
+ return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0;
+ case SystemZ::CGHI:
+ return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0;
+ case SystemZ::CLR:
+ return SystemZ::CLRJ;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRJ;
+ case SystemZ::CLFI:
+ return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0;
+ case SystemZ::CLGFI:
+ return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0;
+ default:
+ return 0;
+ }
+}
+
void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Reg, uint64_t Value) const {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 0fc4761..be4c8fe 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -28,12 +28,31 @@ class SystemZTargetMachine;
namespace SystemZII {
enum {
// See comments in SystemZInstrFormats.td.
- SimpleBDXLoad = (1 << 0),
- SimpleBDXStore = (1 << 1),
- Has20BitOffset = (1 << 2),
- HasIndex = (1 << 3),
- Is128Bit = (1 << 4)
+ SimpleBDXLoad = (1 << 0),
+ SimpleBDXStore = (1 << 1),
+ Has20BitOffset = (1 << 2),
+ HasIndex = (1 << 3),
+ Is128Bit = (1 << 4),
+ AccessSizeMask = (31 << 5),
+ AccessSizeShift = 5,
+ CCValuesMask = (15 << 10),
+ CCValuesShift = 10,
+ CompareZeroCCMaskMask = (15 << 14),
+ CompareZeroCCMaskShift = 14,
+ CCMaskFirst = (1 << 18),
+ CCMaskLast = (1 << 19),
+ IsLogical = (1 << 20)
};
+ static inline unsigned getAccessSize(unsigned int Flags) {
+ return (Flags & AccessSizeMask) >> AccessSizeShift;
+ }
+ static inline unsigned getCCValues(unsigned int Flags) {
+ return (Flags & CCValuesMask) >> CCValuesShift;
+ }
+ static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
+ return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift;
+ }
+
// SystemZ MachineOperand target flags.
enum {
// Masks out the bits for the access model.
@@ -42,14 +61,74 @@ namespace SystemZII {
// @GOT (aka @GOTENT)
MO_GOT = (1 << 0)
};
+ // Classifies a branch.
+ enum BranchType {
+ // An instruction that branches on the current value of CC.
+ BranchNormal,
+
+ // An instruction that peforms a 32-bit signed comparison and branches
+ // on the result.
+ BranchC,
+
+ // An instruction that peforms a 32-bit unsigned comparison and branches
+ // on the result.
+ BranchCL,
+
+ // An instruction that peforms a 64-bit signed comparison and branches
+ // on the result.
+ BranchCG,
+
+ // An instruction that peforms a 64-bit unsigned comparison and branches
+ // on the result.
+ BranchCLG,
+
+ // An instruction that decrements a 32-bit register and branches if
+ // the result is nonzero.
+ BranchCT,
+
+ // An instruction that decrements a 64-bit register and branches if
+ // the result is nonzero.
+ BranchCTG
+ };
+ // Information about a branch instruction.
+ struct Branch {
+ // The type of the branch.
+ BranchType Type;
+
+ // CCMASK_<N> is set if CC might be equal to N.
+ unsigned CCValid;
+
+ // CCMASK_<N> is set if the branch should be taken when CC == N.
+ unsigned CCMask;
+
+ // The target of the branch.
+ const MachineOperand *Target;
+
+ Branch(BranchType type, unsigned ccValid, unsigned ccMask,
+ const MachineOperand *target)
+ : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
+ };
}
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
+ SystemZTargetMachine &TM;
void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
-
+ void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode, bool ConvertHigh) const;
+ void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned LowOpcodeK, unsigned HighOpcode) const;
+ void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
+ void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned Size) const;
+ void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
+ virtual void anchor();
+
public:
explicit SystemZInstrInfo(SystemZTargetMachine &TM);
@@ -58,6 +137,8 @@ public:
int &FrameIndex) const LLVM_OVERRIDE;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const LLVM_OVERRIDE;
+ virtual bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex,
+ int &SrcFrameIndex) const LLVM_OVERRIDE;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -68,6 +149,29 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const LLVM_OVERRIDE;
+ bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &Mask, int &Value) const
+ LLVM_OVERRIDE;
+ bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int Mask, int Value,
+ const MachineRegisterInfo *MRI) const LLVM_OVERRIDE;
+ virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE;
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const
+ LLVM_OVERRIDE;
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumCyclesT,
+ unsigned ExtraPredCyclesT,
+ MachineBasicBlock &FMBB,
+ unsigned NumCyclesF,
+ unsigned ExtraPredCyclesF,
+ const BranchProbability &Probability) const
+ LLVM_OVERRIDE;
+ virtual bool
+ PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const
+ LLVM_OVERRIDE;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -84,6 +188,18 @@ public:
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
+ virtual MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+ virtual MachineInstr *
+ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ virtual MachineInstr *
+ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const;
virtual bool
expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE;
virtual bool
@@ -93,13 +209,15 @@ public:
// Return the SystemZRegisterInfo, which this class owns.
const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+ // Return the size in bytes of MI.
+ uint64_t getInstSizeInBytes(const MachineInstr *MI) const;
+
// Return true if MI is a conditional or unconditional branch.
// When returning true, set Cond to the mask of condition-code
// values on which the instruction will branch, and set Target
// to the operand that contains the branch target. This target
// can be a register or a basic block.
- bool isBranch(const MachineInstr *MI, unsigned &Cond,
- const MachineOperand *&Target) const;
+ SystemZII::Branch getBranchInfo(const MachineInstr *MI) const;
// Get the load and store opcodes for a given register class.
void getLoadStoreOpcodes(const TargetRegisterClass *RC,
@@ -112,6 +230,22 @@ public:
// exists.
unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+ // If Opcode is a load instruction that has a LOAD AND TEST form,
+ // return the opcode for the testing form, otherwise return 0.
+ unsigned getLoadAndTest(unsigned Opcode) const;
+
+ // Return true if ROTATE AND ... SELECTED BITS can be used to select bits
+ // Mask of the R2 operand, given that only the low BitSize bits of Mask are
+ // significant. Set Start and End to the I3 and I4 operands if so.
+ bool isRxSBGMask(uint64_t Mask, unsigned BitSize,
+ unsigned &Start, unsigned &End) const;
+
+ // If Opcode is a COMPARE opcode for which an associated COMPARE AND
+ // BRANCH exists, return the opcode for the latter, otherwise return 0.
+ // MI, if nonnull, is the compare instruction.
+ unsigned getCompareAndBranch(unsigned Opcode,
+ const MachineInstr *MI = 0) const;
+
// Emit code before MBBI in MI to move immediate value Value into
// physical register Reg.
void loadImmediate(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 7ffa382..6524e44 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -32,77 +32,202 @@ let neverHasSideEffects = 1 in {
// Control flow instructions
//===----------------------------------------------------------------------===//
-// A return instruction. R1 is the condition-code mask (all 1s)
-// and R2 is the target address, which is always stored in %r14.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1,
- R1 = 15, R2 = 14, isCodeGenOnly = 1 in {
- def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>;
-}
+// A return instruction (br %r14).
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+ def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
// Unconditional branches. R1 is the condition-code mask (all 1s).
let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
let isIndirectBranch = 1 in
- def BR : InstRR<0x07, (outs), (ins ADDR64:$dst),
- "br\t$dst", [(brind ADDR64:$dst)]>;
+ def BR : InstRR<0x07, (outs), (ins ADDR64:$R2),
+ "br\t$R2", [(brind ADDR64:$R2)]>;
- // An assembler extended mnemonic for BRC. Use a separate instruction for
- // the asm parser, so that we don't relax Js to external symbols into JGs.
- let isCodeGenOnly = 1 in
- def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
- let isAsmParserOnly = 1 in
- def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+ // An assembler extended mnemonic for BRC.
+ def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2",
+ [(br bb:$I2)]>;
// An assembler extended mnemonic for BRCL. (The extension is "G"
// rather than "L" because "JL" is "Jump if Less".)
- def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
- "jg\t$dst", [(br bb:$dst)]>;
+ def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>;
}
// Conditional branches. It's easier for LLVM to handle these branches
// in their raw BRC/BRCL form, with the 4-bit condition-code mask being
// the first operand. It seems friendlier to use mnemonic forms like
// JE and JLH when writing out the assembly though.
-multiclass CondBranches<Operand imm, string short, string long> {
- let isBranch = 1, isTerminator = 1, Uses = [PSW] in {
- def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>;
- def L : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>;
+let isBranch = 1, isTerminator = 1, Uses = [CC] in {
+ let isCodeGenOnly = 1, CCMaskFirst = 1 in {
+ def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1,
+ brtarget16:$I2), "j$R1\t$I2",
+ [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
+ def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1,
+ brtarget32:$I2), "jg$R1\t$I2", []>;
+ }
+ def AsmBRC : InstRI<0xA74, (outs), (ins uimm8zx4:$R1, brtarget16:$I2),
+ "brc\t$R1, $I2", []>;
+ def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2),
+ "brcl\t$R1, $I2", []>;
+ def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2),
+ "bcr\t$R1, $R2", []>;
+}
+
+// Fused compare-and-branch instructions. As for normal branches,
+// we handle these instructions internally in their raw CRJ-like form,
+// but use assembly macros like CRJE when writing them out.
+//
+// These instructions do not use or clobber the condition codes.
+// We nevertheless pretend that they clobber CC, so that we can lower
+// them to separate comparisons and BRCLs if the branch ends up being
+// out of range.
+multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
+ let isBranch = 1, isTerminator = 1, Defs = [CC] in {
+ def RJ : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "crj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def GRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "cgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def IJ : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "cij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
}
}
let isCodeGenOnly = 1 in
- defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">;
-let isAsmParserOnly = 1 in
- defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">;
-
-def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>;
-
-// Define AsmParser mnemonics for each condition code.
-multiclass CondExtendedMnemonic<bits<4> Cond, string name> {
- let R1 = Cond in {
- def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst),
- "j"##name##"\t$dst", []>;
- def L : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
- "jg"##name##"\t$dst", []>;
+ defm C : CompareBranches<cond4, "$M3", "">;
+defm AsmC : CompareBranches<uimm8zx4, "", "$M3, ">;
+
+// Define AsmParser mnemonics for each general condition-code mask
+// (integer or floating-point)
+multiclass CondExtendedMnemonic<bits<4> ccmask, string name> {
+ let R1 = ccmask in {
+ def J : InstRI<0xA74, (outs), (ins brtarget16:$I2),
+ "j"##name##"\t$I2", []>;
+ def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
+ "jg"##name##"\t$I2", []>;
+ def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), "b"##name##"r\t$R2", []>;
+ }
+ def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>;
+ def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>;
+ def LOC : FixedCondUnaryRSY<"loc"##name, 0xEBF2, GR32, ccmask, 4>;
+ def LOCG : FixedCondUnaryRSY<"locg"##name, 0xEBE2, GR64, ccmask, 8>;
+ def STOC : FixedCondStoreRSY<"stoc"##name, 0xEBF3, GR32, ccmask, 4>;
+ def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>;
+}
+defm AsmO : CondExtendedMnemonic<1, "o">;
+defm AsmH : CondExtendedMnemonic<2, "h">;
+defm AsmNLE : CondExtendedMnemonic<3, "nle">;
+defm AsmL : CondExtendedMnemonic<4, "l">;
+defm AsmNHE : CondExtendedMnemonic<5, "nhe">;
+defm AsmLH : CondExtendedMnemonic<6, "lh">;
+defm AsmNE : CondExtendedMnemonic<7, "ne">;
+defm AsmE : CondExtendedMnemonic<8, "e">;
+defm AsmNLH : CondExtendedMnemonic<9, "nlh">;
+defm AsmHE : CondExtendedMnemonic<10, "he">;
+defm AsmNL : CondExtendedMnemonic<11, "nl">;
+defm AsmLE : CondExtendedMnemonic<12, "le">;
+defm AsmNH : CondExtendedMnemonic<13, "nh">;
+defm AsmNO : CondExtendedMnemonic<14, "no">;
+
+// Define AsmParser mnemonics for each integer condition-code mask.
+// This is like the list above, except that condition 3 is not possible
+// and that the low bit of the mask is therefore always 0. This means
+// that each condition has two names. Conditions "o" and "no" are not used.
+//
+// We don't make one of the two names an alias of the other because
+// we need the custom parsing routines to select the correct register class.
+multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> {
+ let M3 = ccmask in {
+ def CR : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2,
+ brtarget16:$RI4),
+ "crj"##name##"\t$R1, $R2, $RI4", []>;
+ def CGR : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2,
+ brtarget16:$RI4),
+ "cgrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CI : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2,
+ brtarget16:$RI4),
+ "cij"##name##"\t$R1, $I2, $RI4", []>;
+ def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
+ brtarget16:$RI4),
+ "cgij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
+ brtarget16:$RI4),
+ "clrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
+ brtarget16:$RI4),
+ "clgrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
+ brtarget16:$RI4),
+ "clij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
+ brtarget16:$RI4),
+ "clgij"##name##"\t$R1, $I2, $RI4", []>;
}
}
-let isAsmParserOnly = 1 in {
- defm AsmJO : CondExtendedMnemonic<1, "o">;
- defm AsmJH : CondExtendedMnemonic<2, "h">;
- defm AsmJNLE : CondExtendedMnemonic<3, "nle">;
- defm AsmJL : CondExtendedMnemonic<4, "l">;
- defm AsmJNHE : CondExtendedMnemonic<5, "nhe">;
- defm AsmJLH : CondExtendedMnemonic<6, "lh">;
- defm AsmJNE : CondExtendedMnemonic<7, "ne">;
- defm AsmJE : CondExtendedMnemonic<8, "e">;
- defm AsmJNLH : CondExtendedMnemonic<9, "nlh">;
- defm AsmJHE : CondExtendedMnemonic<10, "he">;
- defm AsmJNL : CondExtendedMnemonic<11, "nl">;
- defm AsmJLE : CondExtendedMnemonic<12, "le">;
- defm AsmJNH : CondExtendedMnemonic<13, "nh">;
- defm AsmJNO : CondExtendedMnemonic<14, "no">;
+multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
+ : IntCondExtendedMnemonicA<ccmask, name1> {
+ let isAsmParserOnly = 1 in
+ defm Alt : IntCondExtendedMnemonicA<ccmask, name2>;
+}
+defm AsmJH : IntCondExtendedMnemonic<2, "h", "nle">;
+defm AsmJL : IntCondExtendedMnemonic<4, "l", "nhe">;
+defm AsmJLH : IntCondExtendedMnemonic<6, "lh", "ne">;
+defm AsmJE : IntCondExtendedMnemonic<8, "e", "nlh">;
+defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">;
+defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">;
+
+// Decrement a register and branch if it is nonzero. These don't clobber CC,
+// but we might need to split long branches into sequences that do.
+let Defs = [CC] in {
+ def BRCT : BranchUnaryRI<"brct", 0xA76, GR32>;
+ def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>;
}
-def Select32 : SelectWrapper<GR32>;
-def Select64 : SelectWrapper<GR64>;
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
+def Select32 : SelectWrapper<GR32>;
+def Select64 : SelectWrapper<GR64>;
+
+// We don't define 32-bit Mux stores because the low-only STOC should
+// always be used if possible.
+defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm CondStore32 : CondStores<GR32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
+defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
+ nonvolatile_anyextloadi32, bdxaddr20only>;
+defm CondStore64 : CondStores<GR64, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
//===----------------------------------------------------------------------===//
// Call instructions
@@ -110,26 +235,30 @@ def Select64 : SelectWrapper<GR64>;
// The definitions here are for the call-clobbered registers.
let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
- F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
- R1 = 14, isCodeGenOnly = 1 in {
- def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops),
- "bras\t%r14, $dst", []>;
- def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops),
- "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>;
- def BASR : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops),
- "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>;
+ F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in {
+ def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
+ [(z_call pcrel32:$I2)]>;
+ def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
+ [(z_call ADDR64:$R2)]>;
+}
+
+// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards
+// are argument registers and since branching to R0 is a no-op.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def CallJG : Alias<6, (outs), (ins pcrel32:$I2),
+ [(z_sibcall pcrel32:$I2)]>;
+ let Uses = [R1D] in
+ def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
}
// Define the general form of the call instructions for the asm parser.
// These instructions don't hard-code %r14 as the return address register.
-let isAsmParserOnly = 1 in {
- def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst),
- "bras\t$save, $dst", []>;
- def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst),
- "brasl\t$save, $dst", []>;
- def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst),
- "basr\t$save, $dst", []>;
-}
+def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
+ "bras\t$R1, $I2", []>;
+def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
+ "brasl\t$R1, $I2", []>;
+def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
+ "basr\t$R1, $R2", []>;
//===----------------------------------------------------------------------===//
// Move instructions
@@ -137,13 +266,34 @@ let isAsmParserOnly = 1 in {
// Register moves.
let neverHasSideEffects = 1 in {
- def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>;
- def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
+ // Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
+ def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>;
+ def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>;
+}
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
+ def LTR : UnaryRR <"lt", 0x12, null_frag, GR32, GR32>;
+ def LTGR : UnaryRRE<"ltg", 0xB902, null_frag, GR64, GR64>;
+}
+
+// Move on condition.
+let isCodeGenOnly = 1, Uses = [CC] in {
+ def LOCR : CondUnaryRRF<"loc", 0xB9F2, GR32, GR32>;
+ def LOCGR : CondUnaryRRF<"locg", 0xB9E2, GR64, GR64>;
+}
+let Uses = [CC] in {
+ def AsmLOCR : AsmCondUnaryRRF<"loc", 0xB9F2, GR32, GR32>;
+ def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>;
}
// Immediate moves.
-let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- // 16-bit sign-extended immediates.
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in {
+ // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF,
+ // deopending on the choice of register.
+ def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>,
+ Requires<[FeatureHighWord]>;
def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>;
def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>;
@@ -161,11 +311,13 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
// Register loads.
let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
- defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
- def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
-
- def LG : UnaryRXY<"lg", 0xE304, load, GR64>;
- def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
+ // Expands to L, LY or LFH, depending on the choice of register.
+ def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
+ defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>;
+ def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
+ def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>;
// These instructions are split after register allocation, so we don't
// want a custom inserter.
@@ -174,16 +326,35 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
[(set GR128:$dst, (load bdxaddr20only128:$src))]>;
}
}
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
+ def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>;
+ def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>;
+}
+
+let canFoldAsLoad = 1 in {
+ def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
+ def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
+}
+
+// Load on condition.
+let isCodeGenOnly = 1, Uses = [CC] in {
+ def LOC : CondUnaryRSY<"loc", 0xEBF2, nonvolatile_load, GR32, 4>;
+ def LOCG : CondUnaryRSY<"locg", 0xEBE2, nonvolatile_load, GR64, 8>;
+}
+let Uses = [CC] in {
+ def AsmLOC : AsmCondUnaryRSY<"loc", 0xEBF2, GR32, 4>;
+ def AsmLOCG : AsmCondUnaryRSY<"locg", 0xEBE2, GR64, 8>;
+}
// Register stores.
let SimpleBDXStore = 1 in {
- let isCodeGenOnly = 1 in {
- defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
- def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
- }
-
- def STG : StoreRXY<"stg", 0xE324, store, GR64>;
- def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
+ // Expands to ST, STY or STFH, depending on the choice of register.
+ def STMux : StoreRXYPseudo<store, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
+ defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>;
+ def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
+ def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>;
// These instructions are split after register allocation, so we don't
// want a custom inserter.
@@ -192,6 +363,18 @@ let SimpleBDXStore = 1 in {
[(store GR128:$src, bdxaddr20only128:$dst)]>;
}
}
+def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
+def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
+
+// Store on condition.
+let isCodeGenOnly = 1, Uses = [CC] in {
+ def STOC : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
+ def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
+}
+let Uses = [CC] in {
+ def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
+ def AsmSTOCG : AsmCondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
+}
// 8-bit immediate stores to 8-bit fields.
defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
@@ -201,50 +384,70 @@ def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>;
def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>;
def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>;
+// Memory-to-memory moves.
+let mayLoad = 1, mayStore = 1 in
+ defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
+
+// String moves.
+let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in
+ defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
+
//===----------------------------------------------------------------------===//
// Sign extensions
//===----------------------------------------------------------------------===//
+//
+// Note that putting these before zero extensions mean that we will prefer
+// them for anyextload*. There's not really much to choose between the two
+// either way, but signed-extending loads have a short LH and a long LHY,
+// while zero-extending loads have only the long LLH.
+//
+//===----------------------------------------------------------------------===//
// 32-bit extensions from registers.
let neverHasSideEffects = 1 in {
- def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>;
- def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
+ def LBR : UnaryRRE<"lb", 0xB926, sext8, GR32, GR32>;
+ def LHR : UnaryRRE<"lh", 0xB927, sext16, GR32, GR32>;
}
// 64-bit extensions from registers.
let neverHasSideEffects = 1 in {
- def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>;
- def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
- def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
+ def LGBR : UnaryRRE<"lgb", 0xB906, sext8, GR64, GR64>;
+ def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>;
+ def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>;
}
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>;
// Match 32-to-64-bit sign extensions in which the source is already
// in a 64-bit register.
def : Pat<(sext_inreg GR64:$src, i32),
- (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
-
-// 32-bit extensions from memory.
-def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>;
-defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>;
-def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>;
+ (LGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
+
+// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH,
+// depending on the choice of register.
+def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>;
+def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH,
+// depending on the choice of register.
+def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>;
+def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>;
// 64-bit extensions from memory.
-def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64>;
-def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>;
-def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>;
-def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>;
-def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>;
-
-// If the sign of a load-extend operation doesn't matter, use the signed ones.
-// There's not really much to choose between the sign and zero extensions,
-// but LH is more compact than LLH for small offsets.
-def : Pat<(i32 (extloadi8 bdxaddr20only:$src)), (LB bdxaddr20only:$src)>;
-def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH bdxaddr12pair:$src)>;
-def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>;
-
-def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>;
-def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>;
-def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
+def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>;
+def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>;
+def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>;
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>;
//===----------------------------------------------------------------------===//
// Zero extensions
@@ -252,33 +455,51 @@ def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
// 32-bit extensions from registers.
let neverHasSideEffects = 1 in {
- def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>;
- def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
+ // Expands to LLCR or RISB[LH]G, depending on the choice of registers.
+ def LLCRMux : UnaryRRPseudo<"llc", zext8, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>;
+ // Expands to LLHR or RISB[LH]G, depending on the choice of registers.
+ def LLHRMux : UnaryRRPseudo<"llh", zext16, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>;
}
// 64-bit extensions from registers.
let neverHasSideEffects = 1 in {
- def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>;
- def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
- def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
+ def LLGCR : UnaryRRE<"llgc", 0xB984, zext8, GR64, GR64>;
+ def LLGHR : UnaryRRE<"llgh", 0xB985, zext16, GR64, GR64>;
+ def LLGFR : UnaryRRE<"llgf", 0xB916, zext32, GR64, GR32>;
}
// Match 32-to-64-bit zero extensions in which the source is already
// in a 64-bit register.
def : Pat<(and GR64:$src, 0xffffffff),
- (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
-
-// 32-bit extensions from memory.
-def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32>;
-def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>;
-def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>;
+ (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
+
+// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH,
+// depending on the choice of register.
+def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
+def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH,
+// depending on the choice of register.
+def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
+def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
// 64-bit extensions from memory.
-def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64>;
-def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>;
-def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>;
-def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>;
-def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
+def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>;
+def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>;
+def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>;
//===----------------------------------------------------------------------===//
// Truncations
@@ -286,21 +507,31 @@ def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
// Truncations of 64-bit registers to 32-bit registers.
def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
-
-// Truncations of 32-bit registers to memory.
-let isCodeGenOnly = 1 in {
- defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32>;
- defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>;
- def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
-}
+ (EXTRACT_SUBREG GR64:$src, subreg_l32)>;
+
+// Truncations of 32-bit registers to 8-bit memory. STCMux expands to
+// STC, STCY or STCH, depending on the choice of register.
+def STCMux : StoreRXYPseudo<truncstorei8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>;
+def STCH : StoreRXY<"stch", 0xE3C3, truncstorei8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// Truncations of 32-bit registers to 16-bit memory. STHMux expands to
+// STH, STHY or STHH, depending on the choice of register.
+def STHMux : StoreRXYPseudo<truncstorei16, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>;
+def STHH : StoreRXY<"sthh", 0xE3C7, truncstorei16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
// Truncations of 64-bit registers to memory.
-defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64>;
-defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>;
-def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>;
-defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>;
-def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>;
+defm : StoreGR64Pair<STC, STCY, truncstorei8>;
+defm : StoreGR64Pair<STH, STHY, truncstorei16>;
+def : StoreGR64PC<STHRL, aligned_truncstorei16>;
+defm : StoreGR64Pair<ST, STY, truncstorei32>;
+def : StoreGR64PC<STRL, aligned_truncstorei32>;
//===----------------------------------------------------------------------===//
// Multi-register moves
@@ -318,50 +549,77 @@ def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
// Byte-swapping register moves.
let neverHasSideEffects = 1 in {
- def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>;
- def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
+ def LRVR : UnaryRRE<"lrv", 0xB91F, bswap, GR32, GR32>;
+ def LRVGR : UnaryRRE<"lrvg", 0xB90F, bswap, GR64, GR64>;
}
-// Byte-swapping loads.
-def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap>, GR32>;
-def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap>, GR64>;
+// Byte-swapping loads. Unlike normal loads, these instructions are
+// allowed to access storage more than once.
+def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>;
-// Byte-swapping stores.
-def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap>, GR32>;
-def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>;
+// Likewise byte-swapping stores.
+def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>;
+def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>,
+ GR64, 8>;
//===----------------------------------------------------------------------===//
// Load address instructions
//===----------------------------------------------------------------------===//
// Load BDX-style addresses.
-let neverHasSideEffects = 1, Function = "la" in {
- let PairType = "12" in
- def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src),
- "la\t$dst, $src",
- [(set GR64:$dst, laaddr12pair:$src)]>;
- let PairType = "20" in
- def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src),
- "lay\t$dst, $src",
- [(set GR64:$dst, laaddr20pair:$src)]>;
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+ DispKey = "la" in {
+ let DispSize = "12" in
+ def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2),
+ "la\t$R1, $XBD2",
+ [(set GR64:$R1, laaddr12pair:$XBD2)]>;
+ let DispSize = "20" in
+ def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2),
+ "lay\t$R1, $XBD2",
+ [(set GR64:$R1, laaddr20pair:$XBD2)]>;
}
// Load a PC-relative address. There's no version of this instruction
// with a 16-bit offset, so there's no relaxation.
-let neverHasSideEffects = 1 in {
- def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src),
- "larl\t$dst, $src",
- [(set GR64:$dst, pcrel32:$src)]>;
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in {
+ def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2),
+ "larl\t$R1, $I2",
+ [(set GR64:$R1, pcrel32:$I2)]>;
}
//===----------------------------------------------------------------------===//
-// Negation
+// Absolute and Negation
//===----------------------------------------------------------------------===//
-let Defs = [PSW] in {
- def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>;
- def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>;
- def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>;
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LPR : UnaryRR <"lp", 0x10, z_iabs32, GR32, GR32>;
+ def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs64, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>;
+}
+defm : SXU<z_iabs64, LPGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LNR : UnaryRR <"ln", 0x11, z_inegabs32, GR32, GR32>;
+ def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs64, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>;
+}
+defm : SXU<z_inegabs64, LNGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
+ def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
}
defm : SXU<ineg, LCGFR>;
@@ -370,69 +628,83 @@ defm : SXU<ineg, LCGFR>;
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1 in
- defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>;
-defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>;
+ defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>;
-defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>;
-defm : InsertMem<"inserti8", IC, GR64, zextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>;
// Insertions of a 16-bit immediate, leaving other bits unaffected.
// We don't have or_as_insert equivalents of these operations because
// OI is available instead.
-let isCodeGenOnly = 1 in {
- def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
- def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
-}
-def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>;
-def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>;
-def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>;
-def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>;
+//
+// IIxMux expands to II[LH]x, depending on the choice of register.
+def IILMux : BinaryRIPseudo<insertll, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+def IIHMux : BinaryRIPseudo<insertlh, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
+def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
+def IIHL : BinaryRI<"iihl", 0xA51, insertll, GRH32, imm32ll16>;
+def IIHH : BinaryRI<"iihh", 0xA50, insertlh, GRH32, imm32lh16>;
+def IILL64 : BinaryAliasRI<insertll, GR64, imm64ll16>;
+def IILH64 : BinaryAliasRI<insertlh, GR64, imm64lh16>;
+def IIHL64 : BinaryAliasRI<inserthl, GR64, imm64hl16>;
+def IIHH64 : BinaryAliasRI<inserthh, GR64, imm64hh16>;
// ...likewise for 32-bit immediates. For GR32s this is a general
// full-width move. (We use IILF rather than something like LLILF
// for 32-bit moves because IILF leaves the upper 32 bits of the
// GR64 unchanged.)
-let isCodeGenOnly = 1 in {
- def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in {
+ def IIFMux : UnaryRIPseudo<bitconvert, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+ def IIHF : UnaryRIL<"iihf", 0xC08, bitconvert, GRH32, uimm32>;
}
-def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>;
-def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>;
+def IILF64 : BinaryAliasRIL<insertlf, GR64, imm64lf32>;
+def IIHF64 : BinaryAliasRIL<inserthf, GR64, imm64hf32>;
// An alternative model of inserthf, with the first operand being
// a zero-extended value.
def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
- (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit),
- imm64hf32:$imm)>;
+ (IIHF64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ imm64hf32:$imm)>;
//===----------------------------------------------------------------------===//
// Addition
//===----------------------------------------------------------------------===//
// Plain addition.
-let Defs = [PSW] in {
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Addition of a register.
let isCommutable = 1 in {
- def AR : BinaryRR <"ar", 0x1A, add, GR32, GR32>;
- def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>;
+ defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
+ defm AGR : BinaryRREAndK<"ag", 0xB908, 0xB9E8, add, GR64, GR64>;
}
- def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
+ def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>;
// Addition of signed 16-bit immediates.
- def AHI : BinaryRI<"ahi", 0xA7A, add, GR32, imm32sx16>;
- def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>;
+ defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
+ defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>;
+ defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>;
// Addition of signed 32-bit immediates.
+ def AFIMux : BinaryRIPseudo<add, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>;
+ def AIH : BinaryRIL<"aih", 0xCC8, add, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
// Addition of memory.
- defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>;
- defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load>;
- def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>;
- def AG : BinaryRXY<"ag", 0xE308, add, GR64, load>;
+ defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>;
+ defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>;
+ def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>;
+ def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>;
// Addition to memory.
def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
@@ -441,34 +713,40 @@ let Defs = [PSW] in {
defm : SXB<add, GR64, AGFR>;
// Addition producing a carry.
-let Defs = [PSW] in {
+let Defs = [CC] in {
// Addition of a register.
let isCommutable = 1 in {
- def ALR : BinaryRR <"alr", 0x1E, addc, GR32, GR32>;
- def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>;
+ defm ALR : BinaryRRAndK<"al", 0x1E, 0xB9FA, addc, GR32, GR32>;
+ defm ALGR : BinaryRREAndK<"alg", 0xB90A, 0xB9EA, addc, GR64, GR64>;
}
- def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
+ def ALGFR : BinaryRRE<"algf", 0xB91A, null_frag, GR64, GR32>;
+
+ // Addition of signed 16-bit immediates.
+ def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>,
+ Requires<[FeatureDistinctOps]>;
+ def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, addc, GR64, imm64sx16>,
+ Requires<[FeatureDistinctOps]>;
// Addition of unsigned 32-bit immediates.
def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>;
def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
// Addition of memory.
- defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>;
- def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>;
- def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load>;
+ defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>;
+ def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>;
+ def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>;
}
defm : ZXB<addc, GR64, ALGFR>;
// Addition producing and using a carry.
-let Defs = [PSW], Uses = [PSW] in {
+let Defs = [CC], Uses = [CC] in {
// Addition of a register.
- def ALCR : BinaryRRE<"alcr", 0xB998, adde, GR32, GR32>;
- def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>;
+ def ALCR : BinaryRRE<"alc", 0xB998, adde, GR32, GR32>;
+ def ALCGR : BinaryRRE<"alcg", 0xB988, adde, GR64, GR64>;
// Addition of memory.
- def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load>;
- def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>;
+ def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load, 4>;
+ def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>;
}
//===----------------------------------------------------------------------===//
@@ -477,25 +755,26 @@ let Defs = [PSW], Uses = [PSW] in {
// Plain substraction. Although immediate forms exist, we use the
// add-immediate instruction instead.
-let Defs = [PSW] in {
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Subtraction of a register.
- def SR : BinaryRR <"sr", 0x1B, sub, GR32, GR32>;
- def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
- def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>;
+ defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
+ def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
+ defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>;
// Subtraction of memory.
- defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>;
- def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>;
- def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>;
+ defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
+ defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
+ def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>;
+ def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>;
}
defm : SXB<sub, GR64, SGFR>;
// Subtraction producing a carry.
-let Defs = [PSW] in {
+let Defs = [CC] in {
// Subtraction of a register.
- def SLR : BinaryRR <"slr", 0x1F, subc, GR32, GR32>;
- def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
- def SLGR : BinaryRRE<"slgr", 0xB90B, subc, GR64, GR64>;
+ defm SLR : BinaryRRAndK<"sl", 0x1F, 0xB9FB, subc, GR32, GR32>;
+ def SLGFR : BinaryRRE<"slgf", 0xB91B, null_frag, GR64, GR32>;
+ defm SLGR : BinaryRREAndK<"slg", 0xB90B, 0xB9EB, subc, GR64, GR64>;
// Subtraction of unsigned 32-bit immediates. These don't match
// subc because we prefer addc for constants.
@@ -503,56 +782,78 @@ let Defs = [PSW] in {
def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>;
// Subtraction of memory.
- defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>;
- def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>;
- def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load>;
+ defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>;
+ def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>;
+ def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>;
}
defm : ZXB<subc, GR64, SLGFR>;
// Subtraction producing and using a carry.
-let Defs = [PSW], Uses = [PSW] in {
+let Defs = [CC], Uses = [CC] in {
// Subtraction of a register.
- def SLBR : BinaryRRE<"slbr", 0xB999, sube, GR32, GR32>;
- def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>;
+ def SLBR : BinaryRRE<"slb", 0xB999, sube, GR32, GR32>;
+ def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>;
// Subtraction of memory.
- def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load>;
- def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>;
+ def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>;
+ def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load, 8>;
}
//===----------------------------------------------------------------------===//
// AND
//===----------------------------------------------------------------------===//
-let Defs = [PSW] in {
+let Defs = [CC] in {
// ANDs of a register.
- let isCommutable = 1 in {
- def NR : BinaryRR <"nr", 0x14, and, GR32, GR32>;
- def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>;
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
+ defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
}
- // ANDs of a 16-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in {
- def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
- def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+ let isConvertibleToThreeAddress = 1 in {
+ // ANDs of a 16-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 16-bit field, not the full register.
+ //
+ // NIxMux expands to NI[LH]x, depending on the choice of register.
+ def NILMux : BinaryRIPseudo<and, GRX32, imm32ll16c>,
+ Requires<[FeatureHighWord]>;
+ def NIHMux : BinaryRIPseudo<and, GRX32, imm32lh16c>,
+ Requires<[FeatureHighWord]>;
+ def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+ def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+ def NIHL : BinaryRI<"nihl", 0xA55, and, GRH32, imm32ll16c>;
+ def NIHH : BinaryRI<"nihh", 0xA54, and, GRH32, imm32lh16c>;
+ def NILL64 : BinaryAliasRI<and, GR64, imm64ll16c>;
+ def NILH64 : BinaryAliasRI<and, GR64, imm64lh16c>;
+ def NIHL64 : BinaryAliasRI<and, GR64, imm64hl16c>;
+ def NIHH64 : BinaryAliasRI<and, GR64, imm64hh16c>;
+
+ // ANDs of a 32-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to NILF or NIHF, depending on the choice of register.
+ def NIFMux : BinaryRIPseudo<and, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+ def NIHF : BinaryRIL<"nihf", 0xC0A, and, GRH32, uimm32>;
+ }
+ def NILF64 : BinaryAliasRIL<and, GR64, imm64lf32c>;
+ def NIHF64 : BinaryAliasRIL<and, GR64, imm64hf32c>;
}
- def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
- def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
- def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
- def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
-
- // ANDs of a 32-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in
- def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
- def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
- def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
// ANDs of memory.
- defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>;
- def NG : BinaryRXY<"ng", 0xE380, and, GR64, load>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
+ def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ }
// AND to memory
defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+
+ // Block AND.
+ let mayLoad = 1, mayStore = 1 in
+ defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>;
}
defm : RMWIByte<and, bdaddr12pair, NI>;
defm : RMWIByte<and, bdaddr20pair, NIY>;
@@ -561,35 +862,55 @@ defm : RMWIByte<and, bdaddr20pair, NIY>;
// OR
//===----------------------------------------------------------------------===//
-let Defs = [PSW] in {
+let Defs = [CC] in {
// ORs of a register.
- let isCommutable = 1 in {
- def OR : BinaryRR <"or", 0x16, or, GR32, GR32>;
- def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>;
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
+ defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
}
// ORs of a 16-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in {
- def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
- def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
- }
- def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>;
- def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>;
- def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>;
- def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
+ // The CC result only reflects the 16-bit field, not the full register.
+ //
+ // OIxMux expands to OI[LH]x, depending on the choice of register.
+ def OILMux : BinaryRIPseudo<or, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def OIHMux : BinaryRIPseudo<or, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
+ def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
+ def OIHL : BinaryRI<"oihl", 0xA59, or, GRH32, imm32ll16>;
+ def OIHH : BinaryRI<"oihh", 0xA58, or, GRH32, imm32lh16>;
+ def OILL64 : BinaryAliasRI<or, GR64, imm64ll16>;
+ def OILH64 : BinaryAliasRI<or, GR64, imm64lh16>;
+ def OIHL64 : BinaryAliasRI<or, GR64, imm64hl16>;
+ def OIHH64 : BinaryAliasRI<or, GR64, imm64hh16>;
// ORs of a 32-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in
- def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
- def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
- def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to OILF or OIHF, depending on the choice of register.
+ def OIFMux : BinaryRIPseudo<or, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
+ def OIHF : BinaryRIL<"oihf", 0xC0C, or, GRH32, uimm32>;
+ }
+ def OILF64 : BinaryAliasRIL<or, GR64, imm64lf32>;
+ def OIHF64 : BinaryAliasRIL<or, GR64, imm64hf32>;
// ORs of memory.
- defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>;
- def OG : BinaryRXY<"og", 0xE381, or, GR64, load>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
+ def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+ }
// OR to memory
defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+
+ // Block OR.
+ let mayLoad = 1, mayStore = 1 in
+ defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>;
}
defm : RMWIByte<or, bdaddr12pair, OI>;
defm : RMWIByte<or, bdaddr20pair, OIY>;
@@ -598,25 +919,38 @@ defm : RMWIByte<or, bdaddr20pair, OIY>;
// XOR
//===----------------------------------------------------------------------===//
-let Defs = [PSW] in {
+let Defs = [CC] in {
// XORs of a register.
- let isCommutable = 1 in {
- def XR : BinaryRR <"xr", 0x17, xor, GR32, GR32>;
- def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>;
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
+ defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
}
// XORs of a 32-bit immediate, leaving other bits unaffected.
- let isCodeGenOnly = 1 in
- def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
- def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
- def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to XILF or XIHF, depending on the choice of register.
+ def XIFMux : BinaryRIPseudo<xor, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
+ def XIHF : BinaryRIL<"xihf", 0xC06, xor, GRH32, uimm32>;
+ }
+ def XILF64 : BinaryAliasRIL<xor, GR64, imm64lf32>;
+ def XIHF64 : BinaryAliasRIL<xor, GR64, imm64hf32>;
// XORs of memory.
- defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>;
- def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
+ def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+ }
// XOR to memory
defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+
+ // Block XOR.
+ let mayLoad = 1, mayStore = 1 in
+ defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>;
}
defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
@@ -627,10 +961,10 @@ defm : RMWIByte<xor, bdaddr20pair, XIY>;
// Multiplication of a register.
let isCommutable = 1 in {
- def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>;
- def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>;
+ def MSR : BinaryRRE<"ms", 0xB252, mul, GR32, GR32>;
+ def MSGR : BinaryRRE<"msg", 0xB90C, mul, GR64, GR64>;
}
-def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>;
+def MSGFR : BinaryRRE<"msgf", 0xB91C, null_frag, GR64, GR32>;
defm : SXB<mul, GR64, MSGFR>;
// Multiplication of a signed 16-bit immediate.
@@ -642,33 +976,32 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>;
def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
// Multiplication of memory.
-defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>;
-defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>;
-def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>;
-def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load>;
+defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>;
+defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>;
+def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>;
+def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
// Multiplication of a register, producing two results.
-def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
+def MLGR : BinaryRRE<"mlg", 0xB986, z_umul_lohi64, GR128, GR64>;
// Multiplication of memory, producing two results.
-def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>;
+def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
//===----------------------------------------------------------------------===//
// Division and remainder
//===----------------------------------------------------------------------===//
// Division and remainder, from registers.
-def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>;
-def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
-def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
-def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
-defm : SXB<z_sdivrem64, GR128, DSGFR>;
+def DSGFR : BinaryRRE<"dsgf", 0xB91D, z_sdivrem32, GR128, GR32>;
+def DSGR : BinaryRRE<"dsg", 0xB90D, z_sdivrem64, GR128, GR64>;
+def DLR : BinaryRRE<"dl", 0xB997, z_udivrem32, GR128, GR32>;
+def DLGR : BinaryRRE<"dlg", 0xB987, z_udivrem64, GR128, GR64>;
// Division and remainder, from memory.
-def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>;
-def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>;
-def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>;
-def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>;
+def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
+def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
+def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
+def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
//===----------------------------------------------------------------------===//
// Shifts
@@ -676,111 +1009,188 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>;
// Shift left.
let neverHasSideEffects = 1 in {
- def SLL : ShiftRS <"sll", 0x89, shl, GR32, shift12only>;
- def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>;
+ defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+ def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>;
}
// Logical shift right.
let neverHasSideEffects = 1 in {
- def SRL : ShiftRS <"srl", 0x88, srl, GR32, shift12only>;
- def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>;
+ defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
+ def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>;
}
// Arithmetic shift right.
-let Defs = [PSW] in {
- def SRA : ShiftRS <"sra", 0x8A, sra, GR32, shift12only>;
- def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>;
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
+ defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
+ def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
}
// Rotate left.
let neverHasSideEffects = 1 in {
- def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32, shift20only>;
- def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>;
+ def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>;
+ def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>;
}
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
-// end bits (operands 2 and 3) are in the range [32, 64)
-let Defs = [PSW] in {
+// end bits (operands 2 and 3) are in the range [32, 64).
+let Defs = [CC] in {
let isCodeGenOnly = 1 in
- def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
- def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
+ def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
+}
+
+// Forms of RISBG that only affect one word of the destination register.
+// They do not set CC.
+def RISBMux : RotateSelectRIEfPseudo<GRX32, GRX32>, Requires<[FeatureHighWord]>;
+def RISBLL : RotateSelectAliasRIEf<GR32, GR32>, Requires<[FeatureHighWord]>;
+def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>, Requires<[FeatureHighWord]>;
+def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>, Requires<[FeatureHighWord]>;
+def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>, Requires<[FeatureHighWord]>;
+def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>,
+ Requires<[FeatureHighWord]>;
+def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>,
+ Requires<[FeatureHighWord]>;
+
+// Rotate second operand left and perform a logical operation with selected
+// bits of the first operand. The CC result only describes the selected bits,
+// so isn't useful for a full comparison against zero.
+let Defs = [CC] in {
+ def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>;
+ def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>;
+ def RXSBG : RotateSelectRIEf<"rxsbg", 0xEC57, GR64, GR64>;
}
//===----------------------------------------------------------------------===//
// Comparison
//===----------------------------------------------------------------------===//
-// Signed comparisons.
-let Defs = [PSW] in {
+// Signed comparisons. We put these before the unsigned comparisons because
+// some of the signed forms have COMPARE AND BRANCH equivalents whereas none
+// of the unsigned forms do.
+let Defs = [CC], CCValues = 0xE in {
// Comparison with a register.
- def CR : CompareRR <"cr", 0x19, z_cmp, GR32, GR32>;
- def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
- def CGR : CompareRRE<"cgr", 0xB920, z_cmp, GR64, GR64>;
+ def CR : CompareRR <"c", 0x19, z_scmp, GR32, GR32>;
+ def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
+ def CGR : CompareRRE<"cg", 0xB920, z_scmp, GR64, GR64>;
// Comparison with a signed 16-bit immediate.
- def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>;
- def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>;
-
- // Comparison with a signed 32-bit immediate.
- def CFI : CompareRIL<"cfi", 0xC2D, z_cmp, GR32, simm32>;
- def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>;
+ def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>;
+ def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>;
+
+ // Comparison with a signed 32-bit immediate. CFIMux expands to CFI or CIH,
+ // depending on the choice of register.
+ def CFIMux : CompareRIPseudo<z_scmp, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>;
+ def CIH : CompareRIL<"cih", 0xCCD, z_scmp, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>;
// Comparison with memory.
- defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>;
- defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load>;
- def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>;
- def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>;
- def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load>;
- def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>;
- def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>;
- def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>;
- def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>;
- def CGRL : CompareRILPC<"cgrl", 0xC68, z_cmp, GR64, aligned_load>;
+ defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>;
+ def CMux : CompareRXYPseudo<z_scmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>;
+ def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>;
+ def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>;
+ def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>;
+ def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>;
+ def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>;
+ def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>;
+ def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>;
+ def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>;
// Comparison between memory and a signed 16-bit immediate.
- def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>;
- def CHSI : CompareSIL<"chsi", 0xE55C, z_cmp, load, imm32sx16>;
- def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load, imm64sx16>;
+ def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>;
+ def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>;
+ def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>;
}
-defm : SXB<z_cmp, GR64, CGFR>;
+defm : SXB<z_scmp, GR64, CGFR>;
// Unsigned comparisons.
-let Defs = [PSW] in {
+let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
// Comparison with a register.
- def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>;
- def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
- def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>;
-
- // Comparison with a signed 32-bit immediate.
+ def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>;
+ def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
+ def CLGR : CompareRRE<"clg", 0xB921, z_ucmp, GR64, GR64>;
+
+ // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI
+ // or CLIH, depending on the choice of register.
+ def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>;
+ def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GR32, uimm32>,
+ Requires<[FeatureHighWord]>;
def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
// Comparison with memory.
- defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>;
- def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>;
- def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load>;
+ def CLMux : CompareRXYPseudo<z_ucmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>;
+ def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>;
+ def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>;
def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32,
- aligned_zextloadi16>;
+ aligned_azextloadi16>;
def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32,
aligned_load>;
def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
- aligned_zextloadi16>;
+ aligned_azextloadi16>;
def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
- aligned_zextloadi32>;
+ aligned_azextloadi32>;
def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64,
aligned_load>;
// Comparison between memory and an unsigned 8-bit immediate.
- defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>;
+ defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>;
// Comparison between memory and an unsigned 16-bit immediate.
- def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>;
- def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
- def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
+ def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>;
+ def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
+ def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
}
defm : ZXB<z_ucmp, GR64, CLGFR>;
+// Memory-to-memory comparison.
+let mayLoad = 1, Defs = [CC] in
+ defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
+
+// String comparison.
+let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+ defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
+
+// Test under mask.
+let Defs = [CC] in {
+ // TMxMux expands to TM[LH]x, depending on the choice of register.
+ def TMLMux : CompareRIPseudo<z_tm_reg, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def TMHMux : CompareRIPseudo<z_tm_reg, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>;
+ def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>;
+ def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GRH32, imm32ll16>;
+ def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GRH32, imm32lh16>;
+
+ def TMLL64 : CompareAliasRI<z_tm_reg, GR64, imm64ll16>;
+ def TMLH64 : CompareAliasRI<z_tm_reg, GR64, imm64lh16>;
+ def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>;
+ def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>;
+
+ defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Prefetch
+//===----------------------------------------------------------------------===//
+
+def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
+def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
+
//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//
@@ -805,60 +1215,60 @@ def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
-def ATOMIC_LOAD_NILL32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
-def ATOMIC_LOAD_NILH32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
-def ATOMIC_LOAD_NILF32 : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
+def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
+def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
-def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
-def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
-def ATOMIC_LOAD_NIHL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
-def ATOMIC_LOAD_NIHH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
-def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
-def ATOMIC_LOAD_NIHF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
+def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
+def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
+def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
+def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
+def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
+def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
-def ATOMIC_LOAD_OILL32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
-def ATOMIC_LOAD_OILH32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
-def ATOMIC_LOAD_OILF32 : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
-def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
-def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
-def ATOMIC_LOAD_OIHL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
-def ATOMIC_LOAD_OIHH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
-def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
-def ATOMIC_LOAD_OIHF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
-def ATOMIC_LOAD_XILF32 : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
-def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
-def ATOMIC_LOAD_XIHF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
imm32lh16c>;
def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>;
-def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32,
imm32ll16c>;
-def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32,
imm32lh16c>;
-def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
+def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>;
-def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64ll16c>;
-def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64lh16c>;
-def ATOMIC_LOAD_NIHLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hl16c>;
-def ATOMIC_LOAD_NIHHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hh16c>;
-def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64lf32c>;
-def ATOMIC_LOAD_NIHFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hf32c>;
def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
@@ -885,13 +1295,13 @@ def ATOMIC_CMP_SWAPW
(z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
ADDR32:$bitshift, ADDR32:$negbitshift,
uimm32:$bitsize))]> {
- let Defs = [PSW];
+ let Defs = [CC];
let mayLoad = 1;
let mayStore = 1;
let usesCustomInserter = 1;
}
-let Defs = [PSW] in {
+let Defs = [CC] in {
defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>;
def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>;
}
@@ -900,34 +1310,30 @@ let Defs = [PSW] in {
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
+// Extract CC into bits 29 and 28 of a register.
+let Uses = [CC] in
+ def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>;
+
// Read a 32-bit access register into a GR32. As with all GR32 operations,
// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
// when a 64-bit address is stored in a pair of access registers.
-def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src),
- "ear\t$dst, $src",
- [(set GR32:$dst, (z_extract_access access_reg:$src))]>;
+def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2),
+ "ear\t$R1, $R2",
+ [(set GR32:$R1, (z_extract_access access_reg:$R2))]>;
// Find leftmost one, AKA count leading zeros. The instruction actually
// returns a pair of GR64s, the first giving the number of leading zeros
// and the second giving a copy of the source with the leftmost one bit
// cleared. We only use the first result here.
-let Defs = [PSW] in {
- def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
+let Defs = [CC] in {
+ def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>;
}
def : Pat<(ctlz GR64:$src),
- (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>;
+ (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
-
-// There are no 32-bit equivalents of LLILL and LLILH, so use a full
-// 64-bit move followed by a subreg. This preserves the invariant that
-// all GR32 operations only modify the low 32 bits.
-def : Pat<(i32 imm32ll16:$src),
- (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>;
-def : Pat<(i32 imm32lh16:$src),
- (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>;
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
// Extend GR32s and GR64s to GR128s.
let usesCustomInserter = 1 in {
@@ -936,6 +1342,10 @@ let usesCustomInserter = 1 in {
def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
}
+// Search a block of memory for a character.
+let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+ defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
+
//===----------------------------------------------------------------------===//
// Peepholes.
//===----------------------------------------------------------------------===//
@@ -944,12 +1354,40 @@ let usesCustomInserter = 1 in {
defm : ZXB<add, GR64, ALGFR>;
def : Pat<(add GR64:$src1, imm64zx32:$src2),
(ALGFI GR64:$src1, imm64zx32:$src2)>;
-def : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
(ALGF GR64:$src1, bdxaddr20only:$addr)>;
// Use SL* for GR64 subtractions of unsigned 32-bit values.
defm : ZXB<sub, GR64, SLGFR>;
def : Pat<(add GR64:$src1, imm64zx32n:$src2),
(SLGFI GR64:$src1, imm64zx32n:$src2)>;
-def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
(SLGF GR64:$src1, bdxaddr20only:$addr)>;
+
+// Optimize sign-extended 1/0 selects to -1/0 selects. This is important
+// for vector legalization.
+def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, uimm8zx4:$cc)),
+ (i32 31)),
+ (i32 31)),
+ (Select32 (LHI -1), (LHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
+def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid,
+ uimm8zx4:$cc)))),
+ (i32 63)),
+ (i32 63)),
+ (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
+
+// Peepholes for turning scalar operations into block operations.
+defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence,
+ OCSequence, XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 8>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
new file mode 100644
index 0000000..ba027d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -0,0 +1,462 @@
+//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass makes sure that all branches are in range. There are several ways
+// in which this could be done. One aggressive approach is to assume that all
+// branches are in range and successively replace those that turn out not
+// to be in range with a longer form (branch relaxation). A simple
+// implementation is to continually walk through the function relaxing
+// branches until no more changes are needed and a fixed point is reached.
+// However, in the pathological worst case, this implementation is
+// quadratic in the number of blocks; relaxing branch N can make branch N-1
+// go out of range, which in turn can make branch N-2 go out of range,
+// and so on.
+//
+// An alternative approach is to assume that all branches must be
+// converted to their long forms, then reinstate the short forms of
+// branches that, even under this pessimistic assumption, turn out to be
+// in range (branch shortening). This too can be implemented as a function
+// walk that is repeated until a fixed point is reached. In general,
+// the result of shortening is not as good as that of relaxation, and
+// shortening is also quadratic in the worst case; shortening branch N
+// can bring branch N-1 in range of the short form, which in turn can do
+// the same for branch N-2, and so on. The main advantage of shortening
+// is that each walk through the function produces valid code, so it is
+// possible to stop at any point after the first walk. The quadraticness
+// could therefore be handled with a maximum pass count, although the
+// question then becomes: what maximum count should be used?
+//
+// On SystemZ, long branches are only needed for functions bigger than 64k,
+// which are relatively rare to begin with, and the long branch sequences
+// are actually relatively cheap. It therefore doesn't seem worth spending
+// much compilation time on the problem. Instead, the approach we take is:
+//
+// (1) Work out the address that each block would have if no branches
+// need relaxing. Exit the pass early if all branches are in range
+// according to this assumption.
+//
+// (2) Work out the address that each block would have if all branches
+// need relaxing.
+//
+// (3) Walk through the block calculating the final address of each instruction
+// and relaxing those that need to be relaxed. For backward branches,
+// this check uses the final address of the target block, as calculated
+// earlier in the walk. For forward branches, this check uses the
+// address of the target block that was calculated in (2). Both checks
+// give a conservatively-correct range.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-long-branch"
+
+#include "SystemZTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(LongBranches, "Number of long branches.");
+
+namespace {
+ // Represents positional information about a basic block.
+ struct MBBInfo {
+ // The address that we currently assume the block has.
+ uint64_t Address;
+
+ // The size of the block in bytes, excluding terminators.
+ // This value never changes.
+ uint64_t Size;
+
+ // The minimum alignment of the block, as a log2 value.
+ // This value never changes.
+ unsigned Alignment;
+
+ // The number of terminators in this block. This value never changes.
+ unsigned NumTerminators;
+
+ MBBInfo()
+ : Address(0), Size(0), Alignment(0), NumTerminators(0) {}
+ };
+
+ // Represents the state of a block terminator.
+ struct TerminatorInfo {
+ // If this terminator is a relaxable branch, this points to the branch
+ // instruction, otherwise it is null.
+ MachineInstr *Branch;
+
+ // The address that we currently assume the terminator has.
+ uint64_t Address;
+
+ // The current size of the terminator in bytes.
+ uint64_t Size;
+
+ // If Branch is nonnull, this is the number of the target block,
+ // otherwise it is unused.
+ unsigned TargetBlock;
+
+ // If Branch is nonnull, this is the length of the longest relaxed form,
+ // otherwise it is zero.
+ unsigned ExtraRelaxSize;
+
+ TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {}
+ };
+
+ // Used to keep track of the current position while iterating over the blocks.
+ struct BlockPosition {
+ // The address that we assume this position has.
+ uint64_t Address;
+
+ // The number of low bits in Address that are known to be the same
+ // as the runtime address.
+ unsigned KnownBits;
+
+ BlockPosition(unsigned InitialAlignment)
+ : Address(0), KnownBits(InitialAlignment) {}
+ };
+
+ class SystemZLongBranch : public MachineFunctionPass {
+ public:
+ static char ID;
+ SystemZLongBranch(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(0) {}
+
+ virtual const char *getPassName() const {
+ return "SystemZ Long Branch";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
+ void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
+ bool AssumeRelaxed);
+ TerminatorInfo describeTerminator(MachineInstr *MI);
+ uint64_t initMBBInfo();
+ bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
+ bool mustRelaxABranch();
+ void setWorstCaseAddresses();
+ void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode);
+ void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode);
+ void relaxBranch(TerminatorInfo &Terminator);
+ void relaxBranches();
+
+ const SystemZInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<MBBInfo, 16> MBBs;
+ SmallVector<TerminatorInfo, 16> Terminators;
+ };
+
+ char SystemZLongBranch::ID = 0;
+
+ const uint64_t MaxBackwardRange = 0x10000;
+ const uint64_t MaxForwardRange = 0xfffe;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
+ return new SystemZLongBranch(TM);
+}
+
+// Position describes the state immediately before Block. Update Block
+// accordingly and move Position to the end of the block's non-terminator
+// instructions.
+void SystemZLongBranch::skipNonTerminators(BlockPosition &Position,
+ MBBInfo &Block) {
+ if (Block.Alignment > Position.KnownBits) {
+ // When calculating the address of Block, we need to conservatively
+ // assume that Block had the worst possible misalignment.
+ Position.Address += ((uint64_t(1) << Block.Alignment) -
+ (uint64_t(1) << Position.KnownBits));
+ Position.KnownBits = Block.Alignment;
+ }
+
+ // Align the addresses.
+ uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1;
+ Position.Address = (Position.Address + AlignMask) & ~AlignMask;
+
+ // Record the block's position.
+ Block.Address = Position.Address;
+
+ // Move past the non-terminators in the block.
+ Position.Address += Block.Size;
+}
+
+// Position describes the state immediately before Terminator.
+// Update Terminator accordingly and move Position past it.
+// Assume that Terminator will be relaxed if AssumeRelaxed.
+void SystemZLongBranch::skipTerminator(BlockPosition &Position,
+ TerminatorInfo &Terminator,
+ bool AssumeRelaxed) {
+ Terminator.Address = Position.Address;
+ Position.Address += Terminator.Size;
+ if (AssumeRelaxed)
+ Position.Address += Terminator.ExtraRelaxSize;
+}
+
+// Return a description of terminator instruction MI.
+TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
+ TerminatorInfo Terminator;
+ Terminator.Size = TII->getInstSizeInBytes(MI);
+ if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) {
+ switch (MI->getOpcode()) {
+ case SystemZ::J:
+ // Relaxes to JG, which is 2 bytes longer.
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ case SystemZ::BRC:
+ // Relaxes to BRCL, which is 2 bytes longer.
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ case SystemZ::BRCT:
+ case SystemZ::BRCTG:
+ // Relaxes to A(G)HI and BRCL, which is 6 bytes longer.
+ Terminator.ExtraRelaxSize = 6;
+ break;
+ case SystemZ::CRJ:
+ case SystemZ::CLRJ:
+ // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer.
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ case SystemZ::CGRJ:
+ case SystemZ::CLGRJ:
+ // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer.
+ Terminator.ExtraRelaxSize = 4;
+ break;
+ case SystemZ::CIJ:
+ case SystemZ::CGIJ:
+ // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer.
+ Terminator.ExtraRelaxSize = 4;
+ break;
+ case SystemZ::CLIJ:
+ case SystemZ::CLGIJ:
+ // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer.
+ Terminator.ExtraRelaxSize = 6;
+ break;
+ default:
+ llvm_unreachable("Unrecognized branch instruction");
+ }
+ Terminator.Branch = MI;
+ Terminator.TargetBlock =
+ TII->getBranchInfo(MI).Target->getMBB()->getNumber();
+ }
+ return Terminator;
+}
+
+// Fill MBBs and Terminators, setting the addresses on the assumption
+// that no branches need relaxation. Return the size of the function under
+// this assumption.
+uint64_t SystemZLongBranch::initMBBInfo() {
+ MF->RenumberBlocks();
+ unsigned NumBlocks = MF->size();
+
+ MBBs.clear();
+ MBBs.resize(NumBlocks);
+
+ Terminators.clear();
+ Terminators.reserve(NumBlocks);
+
+ BlockPosition Position(MF->getAlignment());
+ for (unsigned I = 0; I < NumBlocks; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I);
+ MBBInfo &Block = MBBs[I];
+
+ // Record the alignment, for quick access.
+ Block.Alignment = MBB->getAlignment();
+
+ // Calculate the size of the fixed part of the block.
+ MachineBasicBlock::iterator MI = MBB->begin();
+ MachineBasicBlock::iterator End = MBB->end();
+ while (MI != End && !MI->isTerminator()) {
+ Block.Size += TII->getInstSizeInBytes(MI);
+ ++MI;
+ }
+ skipNonTerminators(Position, Block);
+
+ // Add the terminators.
+ while (MI != End) {
+ if (!MI->isDebugValue()) {
+ assert(MI->isTerminator() && "Terminator followed by non-terminator");
+ Terminators.push_back(describeTerminator(MI));
+ skipTerminator(Position, Terminators.back(), false);
+ ++Block.NumTerminators;
+ }
+ ++MI;
+ }
+ }
+
+ return Position.Address;
+}
+
+// Return true if, under current assumptions, Terminator would need to be
+// relaxed if it were placed at address Address.
+bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator,
+ uint64_t Address) {
+ if (!Terminator.Branch)
+ return false;
+
+ const MBBInfo &Target = MBBs[Terminator.TargetBlock];
+ if (Address >= Target.Address) {
+ if (Address - Target.Address <= MaxBackwardRange)
+ return false;
+ } else {
+ if (Target.Address - Address <= MaxForwardRange)
+ return false;
+ }
+
+ return true;
+}
+
+// Return true if, under current assumptions, any terminator needs
+// to be relaxed.
+bool SystemZLongBranch::mustRelaxABranch() {
+ for (SmallVectorImpl<TerminatorInfo>::iterator TI = Terminators.begin(),
+ TE = Terminators.end(); TI != TE; ++TI)
+ if (mustRelaxBranch(*TI, TI->Address))
+ return true;
+ return false;
+}
+
+// Set the address of each block on the assumption that all branches
+// must be long.
+void SystemZLongBranch::setWorstCaseAddresses() {
+ SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
+ BlockPosition Position(MF->getAlignment());
+ for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end();
+ BI != BE; ++BI) {
+ skipNonTerminators(Position, *BI);
+ for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) {
+ skipTerminator(Position, *TI, true);
+ ++TI;
+ }
+ }
+}
+
+// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed
+// by a BRCL on the result.
+void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
+ unsigned AddOpcode) {
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(-1);
+ MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_NE)
+ .addOperand(MI->getOperand(2));
+ // The implicit use of CC is a killing use.
+ BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
+ MI->eraseFromParent();
+}
+
+// Split MI into the comparison given by CompareOpcode followed
+// a BRCL on the result.
+void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
+ unsigned CompareOpcode) {
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*MBB, MI, DL, TII->get(CompareOpcode))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(3));
+ // The implicit use of CC is a killing use.
+ BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
+ MI->eraseFromParent();
+}
+
+// Relax the branch described by Terminator.
+void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
+ MachineInstr *Branch = Terminator.Branch;
+ switch (Branch->getOpcode()) {
+ case SystemZ::J:
+ Branch->setDesc(TII->get(SystemZ::JG));
+ break;
+ case SystemZ::BRC:
+ Branch->setDesc(TII->get(SystemZ::BRCL));
+ break;
+ case SystemZ::BRCT:
+ splitBranchOnCount(Branch, SystemZ::AHI);
+ break;
+ case SystemZ::BRCTG:
+ splitBranchOnCount(Branch, SystemZ::AGHI);
+ break;
+ case SystemZ::CRJ:
+ splitCompareBranch(Branch, SystemZ::CR);
+ break;
+ case SystemZ::CGRJ:
+ splitCompareBranch(Branch, SystemZ::CGR);
+ break;
+ case SystemZ::CIJ:
+ splitCompareBranch(Branch, SystemZ::CHI);
+ break;
+ case SystemZ::CGIJ:
+ splitCompareBranch(Branch, SystemZ::CGHI);
+ break;
+ case SystemZ::CLRJ:
+ splitCompareBranch(Branch, SystemZ::CLR);
+ break;
+ case SystemZ::CLGRJ:
+ splitCompareBranch(Branch, SystemZ::CLGR);
+ break;
+ case SystemZ::CLIJ:
+ splitCompareBranch(Branch, SystemZ::CLFI);
+ break;
+ case SystemZ::CLGIJ:
+ splitCompareBranch(Branch, SystemZ::CLGFI);
+ break;
+ default:
+ llvm_unreachable("Unrecognized branch");
+ }
+
+ Terminator.Size += Terminator.ExtraRelaxSize;
+ Terminator.ExtraRelaxSize = 0;
+ Terminator.Branch = 0;
+
+ ++LongBranches;
+}
+
+// Run a shortening pass and relax any branches that need to be relaxed.
+void SystemZLongBranch::relaxBranches() {
+ SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
+ BlockPosition Position(MF->getAlignment());
+ for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end();
+ BI != BE; ++BI) {
+ skipNonTerminators(Position, *BI);
+ for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) {
+ assert(Position.Address <= TI->Address &&
+ "Addresses shouldn't go forwards");
+ if (mustRelaxBranch(*TI, Position.Address))
+ relaxBranch(*TI);
+ skipTerminator(Position, *TI, false);
+ ++TI;
+ }
+ }
+}
+
+bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ MF = &F;
+ uint64_t Size = initMBBInfo();
+ if (Size <= MaxForwardRange || !mustRelaxABranch())
+ return false;
+
+ setWorstCaseAddresses();
+ relaxBranches();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
index 5d83321..ff9a6c0 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -15,20 +15,6 @@
using namespace llvm;
-// Where relaxable pairs of reloc-generating instructions exist,
-// we tend to use the longest form by default, since that produces
-// correct assembly in cases where no relaxation is performed.
-// If Opcode is one such instruction, return the opcode for the
-// shortest possible form instead, otherwise return Opcode itself.
-static unsigned getShortenedInstr(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::BRCL: return SystemZ::BRC;
- case SystemZ::JG: return SystemZ::J;
- case SystemZ::BRASL: return SystemZ::BRAS;
- }
- return Opcode;
-}
-
// Return the VK_* enumeration for MachineOperand target flags Flags.
static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) {
@@ -40,77 +26,75 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
}
-SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+SystemZMCInstLower::SystemZMCInstLower(MCContext &ctx,
SystemZAsmPrinter &asmprinter)
- : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {}
+ : Ctx(ctx), AsmPrinter(asmprinter) {}
-MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Symbol,
- int64_t Offset) const {
- MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
- const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
- if (Offset) {
- const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
- Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+const MCExpr *
+SystemZMCInstLower::getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const {
+ const MCSymbol *Symbol;
+ bool HasOffset = true;
+ switch (MO.getType()) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = AsmPrinter.getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ default:
+ llvm_unreachable("unknown operand type");
}
- return MCOperand::CreateExpr(Expr);
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
+ if (HasOffset)
+ if (int64_t Offset = MO.getOffset()) {
+ const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+ Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ }
+ return Expr;
}
MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
switch (MO.getType()) {
- default:
- llvm_unreachable("unknown operand type");
-
case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit())
- return MCOperand();
return MCOperand::CreateReg(MO.getReg());
case MachineOperand::MO_Immediate:
return MCOperand::CreateImm(MO.getImm());
- case MachineOperand::MO_MachineBasicBlock:
- return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(),
- /* MO has no offset field */0);
-
- case MachineOperand::MO_GlobalAddress:
- return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()),
- MO.getOffset());
-
- case MachineOperand::MO_ExternalSymbol: {
- StringRef Name = MO.getSymbolName();
- return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name),
- MO.getOffset());
- }
-
- case MachineOperand::MO_JumpTableIndex:
- return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()),
- /* MO has no offset field */0);
-
- case MachineOperand::MO_ConstantPoolIndex:
- return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()),
- MO.getOffset());
-
- case MachineOperand::MO_BlockAddress: {
- const BlockAddress *BA = MO.getBlockAddress();
- return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA),
- MO.getOffset());
+ default: {
+ MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
+ return MCOperand::CreateExpr(getExpr(MO, Kind));
}
}
}
void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- unsigned Opcode = MI->getOpcode();
- // When emitting binary code, start with the shortest form of an instruction
- // and then relax it where necessary.
- if (!AsmPrinter.OutStreamer.hasRawTextSupport())
- Opcode = getShortenedInstr(Opcode);
- OutMI.setOpcode(Opcode);
+ OutMI.setOpcode(MI->getOpcode());
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
- MCOperand MCOp = lowerOperand(MO);
- if (MCOp.isValid())
- OutMI.addOperand(MCOp);
+ // Ignore all implicit register operands.
+ if (!MO.isReg() || !MO.isImplicit())
+ OutMI.addOperand(lowerOperand(MO));
}
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
index afa72f3..f6d5ac8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -10,37 +10,34 @@
#ifndef LLVM_SYSTEMZMCINSTLOWER_H
#define LLVM_SYSTEMZMCINSTLOWER_H
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
-class MCContext;
class MCInst;
class MCOperand;
-class MCSymbol;
class MachineInstr;
class MachineOperand;
class Mangler;
class SystemZAsmPrinter;
class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
- Mangler *Mang;
MCContext &Ctx;
SystemZAsmPrinter &AsmPrinter;
public:
- SystemZMCInstLower(Mangler *mang, MCContext &ctx,
- SystemZAsmPrinter &asmPrinter);
+ SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmPrinter);
// Lower MachineInstr MI to MCInst OutMI.
void lower(const MachineInstr *MI, MCInst &OutMI) const;
- // Return an MCOperand for MO. Return an empty operand if MO is implicit.
+ // Return an MCOperand for MO.
MCOperand lowerOperand(const MachineOperand& MO) const;
- // Return an MCOperand for MO, given that it equals Symbol + Offset.
- MCOperand lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Symbol, int64_t Offset) const;
+ // Return an MCExpr for symbolic operand MO with variant kind Kind.
+ const MCExpr *getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
new file mode 100644
index 0000000..00572d0
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -0,0 +1,17 @@
+//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineFunctionInfo.h"
+
+using namespace llvm;
+
+
+// pin vtable to this file
+void SystemZMachineFunctionInfo::anchor() {}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 1dc05a7e..845291f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -15,7 +15,7 @@
namespace llvm {
class SystemZMachineFunctionInfo : public MachineFunctionInfo {
- unsigned SavedGPRFrameSize;
+ virtual void anchor();
unsigned LowSavedGPR;
unsigned HighSavedGPR;
unsigned VarArgsFirstGPR;
@@ -26,14 +26,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
public:
explicit SystemZMachineFunctionInfo(MachineFunction &MF)
- : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0),
- VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0),
- ManipulatesSP(false) {}
-
- // Get and set the number of bytes allocated by generic code to store
- // call-saved GPRs.
- unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; }
- void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; }
+ : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
+ VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {}
// Get and set the first call-saved GPR that should be saved and restored
// by this function. This is 0 if no GPRs need to be saved or restored.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
index 0abc3f7..3ad146c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -24,57 +24,93 @@ class ImmediateAsmOperand<string name>
class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
: PatLeaf<(vt imm), pred, xform>, Operand<vt> {
let PrintMethod = "print"##asmop##"Operand";
+ let DecoderMethod = "decode"##asmop##"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
}
+// Constructs an asm operand for a PC-relative address. SIZE says how
+// many bits there are.
+class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> {
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parsePCRel"##size;
+}
+
+// Constructs an operand for a PC-relative address with address type VT.
+// ASMOP is the associated asm operand.
+class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+ let PrintMethod = "printPCRelOperand";
+ let ParserMatchClass = asmop;
+}
+
// Constructs both a DAG pattern and instruction operand for a PC-relative
-// address with address size VT. SELF is the name of the operand.
-class PCRelAddress<ValueType vt, string self>
- : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>,
- Operand<vt> {
+// address with address size VT. SELF is the name of the operand and
+// ASMOP is the associated asm operand.
+class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>
+ : ComplexPattern<vt, 1, "selectPCRelAddress",
+ [z_pcrel_wrapper, z_pcrel_offset]>,
+ PCRelOperand<vt, asmop> {
let MIOperandInfo = (ops !cast<Operand>(self));
}
// Constructs an AsmOperandClass for addressing mode FORMAT, treating the
// registers as having BITSIZE bits and displacements as having DISPSIZE bits.
-class AddressAsmOperand<string format, string bitsize, string dispsize>
+// LENGTH is "LenN" for addresses with an N-bit length field, otherwise it
+// is "".
+class AddressAsmOperand<string format, string bitsize, string dispsize,
+ string length = "">
: AsmOperandClass {
- let Name = format##bitsize##"Disp"##dispsize;
+ let Name = format##bitsize##"Disp"##dispsize##length;
let ParserMethod = "parse"##format##bitsize;
let RenderMethod = "add"##format##"Operands";
}
// Constructs both a DAG pattern and instruction operand for an addressing mode.
-// The mode is selected by custom code in selectTYPE...SUFFIX(). The address
-// registers have BITSIZE bits and displacements have DISPSIZE bits. NUMOPS is
-// the number of operands that make up an address and OPERANDS lists the types
-// of those operands using (ops ...). FORMAT is the type of addressing mode,
-// which needs to match the names used in AddressAsmOperand.
-class AddressingMode<string type, string bitsize, string dispsize,
- string suffix, int numops, string format, dag operands>
+// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
+// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands
+// (base register, displacement, etc.). SELTYPE is the type of the memory
+// operand for selection purposes; sometimes we want different selection
+// choices for the same underlying addressing mode. SUFFIX is similarly
+// a suffix appended to the displacement for selection purposes;
+// e.g. we want to reject small 20-bit displacements if a 12-bit form
+// also exists, but we want to accept them otherwise.
+class AddressingMode<string seltype, string bitsize, string dispsize,
+ string suffix, string length, int numops, string format,
+ dag operands>
: ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
- "select"##type##dispsize##suffix,
+ "select"##seltype##dispsize##suffix##length,
[add, sub, or, frameindex, z_adjdynalloc]>,
Operand<!cast<ValueType>("i"##bitsize)> {
let PrintMethod = "print"##format##"Operand";
+ let EncoderMethod = "get"##format##dispsize##length##"Encoding";
+ let DecoderMethod =
+ "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
let MIOperandInfo = operands;
let ParserMatchClass =
- !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize);
+ !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
}
// An addressing mode with a base and displacement but no index.
class BDMode<string type, string bitsize, string dispsize, string suffix>
- : AddressingMode<type, bitsize, dispsize, suffix, 2, "BDAddr",
+ : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr",
(ops !cast<RegisterOperand>("ADDR"##bitsize),
!cast<Immediate>("disp"##dispsize##"imm"##bitsize))>;
// An addressing mode with a base, displacement and index.
class BDXMode<string type, string bitsize, string dispsize, string suffix>
- : AddressingMode<type, bitsize, dispsize, suffix, 3, "BDXAddr",
+ : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr",
(ops !cast<RegisterOperand>("ADDR"##bitsize),
!cast<Immediate>("disp"##dispsize##"imm"##bitsize),
!cast<RegisterOperand>("ADDR"##bitsize))>;
+// A BDMode paired with an immediate length operand of LENSIZE bits.
+class BDLMode<string type, string bitsize, string dispsize, string suffix,
+ string lensize>
+ : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3,
+ "BDLAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+ !cast<Immediate>("imm"##bitsize))>;
+
//===----------------------------------------------------------------------===//
// Extracting immediate operands from nodes
// These all create MVT::i64 nodes to ensure the value is not sign-extended
@@ -298,6 +334,10 @@ def imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
+def imm64zx8 : Immediate<i64, [{
+ return isUInt<8>(N->getSExtValue());
+}], UIMM8, "U8Imm">;
+
def imm64sx16 : Immediate<i64, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
@@ -318,7 +358,7 @@ def imm64zx32n : Immediate<i64, [{
return isUInt<32>(-N->getSExtValue());
}], NEGIMM32, "U32Imm">;
-def imm64 : ImmLeaf<i64, [{}]>;
+def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
//===----------------------------------------------------------------------===//
// Floating-point immediates
@@ -334,30 +374,26 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
// Symbolic address operands
//===----------------------------------------------------------------------===//
+// PC-relative asm operands.
+def PCRel16 : PCRelAsmOperand<"16">;
+def PCRel32 : PCRelAsmOperand<"32">;
+
// PC-relative offsets of a basic block. The offset is sign-extended
// and multiplied by 2.
-def brtarget16 : Operand<OtherVT> {
+def brtarget16 : PCRelOperand<OtherVT, PCRel16> {
let EncoderMethod = "getPC16DBLEncoding";
+ let DecoderMethod = "decodePC16DBLOperand";
}
-def brtarget32 : Operand<OtherVT> {
+def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
let EncoderMethod = "getPC32DBLEncoding";
+ let DecoderMethod = "decodePC32DBLOperand";
}
// A PC-relative offset of a global value. The offset is sign-extended
// and multiplied by 2.
-def pcrel32 : PCRelAddress<i64, "pcrel32"> {
+def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
let EncoderMethod = "getPC32DBLEncoding";
-}
-
-// A PC-relative offset of a global value when the value is used as a
-// call target. The offset is sign-extended and multiplied by 2.
-def pcrel16call : PCRelAddress<i64, "pcrel16call"> {
- let PrintMethod = "printCallOperand";
- let EncoderMethod = "getPLT16DBLEncoding";
-}
-def pcrel32call : PCRelAddress<i64, "pcrel32call"> {
- let PrintMethod = "printCallOperand";
- let EncoderMethod = "getPLT32DBLEncoding";
+ let DecoderMethod = "decodePC32DBLOperand";
}
//===----------------------------------------------------------------------===//
@@ -372,22 +408,25 @@ def disp12imm64 : Operand<i64>;
def disp20imm32 : Operand<i32>;
def disp20imm64 : Operand<i64>;
-def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">;
-def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">;
-def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">;
-def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">;
-def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">;
-def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
+def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">;
+def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">;
+def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">;
+def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">;
+def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">;
+def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
+def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">;
// DAG patterns and operands for addressing modes. Each mode has
-// the form <type><range><group> where:
+// the form <type><range><group>[<len>] where:
//
// <type> is one of:
// shift : base + displacement (32-bit)
// bdaddr : base + displacement
+// mviaddr : like bdaddr, but reject cases with a natural index
// bdxaddr : base + displacement + index
// laaddr : like bdxaddr, but used for Load Address operations
// dynalloc : base + displacement + index + ADJDYNALLOC
+// bdladdr : base + displacement with a length field
//
// <range> is one of:
// 12 : the displacement is an unsigned 12-bit value
@@ -398,20 +437,28 @@ def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
// range value (12 or 20)
// only : used when there is no equivalent instruction with the opposite
// range value
-def shift12only : BDMode <"BDAddr", "32", "12", "Only">;
-def shift20only : BDMode <"BDAddr", "32", "20", "Only">;
-def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">;
-def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">;
-def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">;
-def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">;
-def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">;
-def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">;
-def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">;
-def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">;
-def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">;
-def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">;
-def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">;
-def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">;
+//
+// <len> is one of:
+//
+// <empty> : there is no length field
+// len8 : the length field is 8 bits, with a range of [1, 0x100].
+def shift12only : BDMode <"BDAddr", "32", "12", "Only">;
+def shift20only : BDMode <"BDAddr", "32", "20", "Only">;
+def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">;
+def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">;
+def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">;
+def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">;
+def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">;
+def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">;
+def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">;
+def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">;
+def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">;
+def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">;
+def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">;
+def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">;
+def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">;
+def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">;
+def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">;
//===----------------------------------------------------------------------===//
// Miscellaneous
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
index 8c4df56..31cabaa 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -15,16 +15,25 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>,
SDTCisVT<1, i64>]>;
def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
-def SDT_ZBRCCMask : SDTypeProfile<0, 2,
+def SDT_ZICmp : SDTypeProfile<0, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZBRCCMask : SDTypeProfile<0, 3,
[SDTCisVT<0, i8>,
- SDTCisVT<1, OtherVT>]>;
-def SDT_ZSelectCCMask : SDTypeProfile<1, 3,
+ SDTCisVT<1, i8>,
+ SDTCisVT<2, OtherVT>]>;
+def SDT_ZSelectCCMask : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
- SDTCisVT<3, i8>]>;
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i8>]>;
def SDT_ZWrapPtr : SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
+def SDT_ZWrapOffset : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def SDT_ZExtractAccess : SDTypeProfile<1, 1,
[SDTCisVT<0, i32>,
@@ -52,6 +61,24 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
SDTCisVT<4, i32>,
SDTCisVT<5, i32>,
SDTCisVT<6, i32>]>;
+def SDT_ZMemMemLength : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i64>]>;
+def SDT_ZMemMemLoop : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i64>]>;
+def SDT_ZString : SDTypeProfile<1, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_ZPrefetch : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisPtrTy<1>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -70,9 +97,15 @@ def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
-def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>;
-def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
+ SDT_ZWrapOffset, []>;
+def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>;
+def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>;
def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
[SDNPHasChain, SDNPInGlue]>;
def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
@@ -81,6 +114,7 @@ def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
SDT_ZExtractAccess>;
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
+def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
@@ -102,10 +136,56 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
+def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
+ [SDNPInGlue]>;
+def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
+// Signed and unsigned comparisons.
+def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::UnsignedOnly;
+}]>;
+def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::SignedOnly;
+}]>;
+
+// Register- and memory-based TEST UNDER MASK.
+def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
+def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
+
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
@@ -120,17 +200,61 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
+// Extending loads in which the extension type can be signed.
+def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD;
+}]>;
+def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Extending loads in which the extension type can be unsigned.
+def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD;
+}]>;
+def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Extending loads in which the extension type doesn't matter.
+def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
+}]>;
+def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
// Aligned loads.
class AlignedLoad<SDPatternOperator load>
: PatFrag<(ops node:$addr), (load node:$addr), [{
LoadSDNode *Load = cast<LoadSDNode>(N);
return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
}]>;
-def aligned_load : AlignedLoad<load>;
-def aligned_sextloadi16 : AlignedLoad<sextloadi16>;
-def aligned_sextloadi32 : AlignedLoad<sextloadi32>;
-def aligned_zextloadi16 : AlignedLoad<zextloadi16>;
-def aligned_zextloadi32 : AlignedLoad<zextloadi32>;
+def aligned_load : AlignedLoad<load>;
+def aligned_asextloadi16 : AlignedLoad<asextloadi16>;
+def aligned_asextloadi32 : AlignedLoad<asextloadi32>;
+def aligned_azextloadi16 : AlignedLoad<azextloadi16>;
+def aligned_azextloadi32 : AlignedLoad<azextloadi32>;
// Aligned stores.
class AlignedStore<SDPatternOperator store>
@@ -142,6 +266,54 @@ def aligned_store : AlignedStore<store>;
def aligned_truncstorei16 : AlignedStore<truncstorei16>;
def aligned_truncstorei32 : AlignedStore<truncstorei32>;
+// Non-volatile loads. Used for instructions that might access the storage
+// location multiple times.
+class NonvolatileLoad<SDPatternOperator load>
+ : PatFrag<(ops node:$addr), (load node:$addr), [{
+ LoadSDNode *Load = cast<LoadSDNode>(N);
+ return !Load->isVolatile();
+}]>;
+def nonvolatile_load : NonvolatileLoad<load>;
+def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
+def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
+def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
+
+// Non-volatile stores.
+class NonvolatileStore<SDPatternOperator store>
+ : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
+ StoreSDNode *Store = cast<StoreSDNode>(N);
+ return !Store->isVolatile();
+}]>;
+def nonvolatile_store : NonvolatileStore<store>;
+def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
+def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
+def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
+
+// A store of a load that can be implemented using MVC.
+def mvc_store : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore node:$value, node:$addr),
+ [{ return storeLoadCanUseMVC(N); }]>;
+
+// Binary read-modify-write operations on memory in which the other
+// operand is also memory and for which block operations like NC can
+// be used. There are two patterns for each operator, depending on
+// which operand contains the "other" load.
+multiclass block_op<SDPatternOperator operator> {
+ def "1" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator node:$value,
+ (unindexedload node:$addr)),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 0); }]>;
+ def "2" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator (unindexedload node:$addr),
+ node:$value),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 1); }]>;
+}
+defm block_and : block_op<and>;
+defm block_or : block_op<or>;
+defm block_xor : block_op<xor>;
+
// Insertions.
def inserti8 : PatFrag<(ops node:$src1, node:$src2),
(or (and node:$src1, -256), node:$src2)>;
@@ -174,6 +346,16 @@ def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
APInt::getLowBitsSet(BitWidth, 8));
}]>;
+// Integer absolute, matching the canonical form generated by DAGCombiner.
+def z_iabs32 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 31))),
+ (sra node:$src, (i32 31)))>;
+def z_iabs64 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 63))),
+ (sra node:$src, (i32 63)))>;
+def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
+def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
+
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -186,11 +368,11 @@ def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
// Create a unary operator that loads from memory and then performs
// the given operation on it.
-class loadu<SDPatternOperator operator>
+class loadu<SDPatternOperator operator, SDPatternOperator load = load>
: PatFrag<(ops node:$addr), (operator (load node:$addr))>;
// Create a store operator that performs the given unary operation
// on the value before storing it.
-class storeu<SDPatternOperator operator>
+class storeu<SDPatternOperator operator, SDPatternOperator store = store>
: PatFrag<(ops node:$value, node:$addr),
(store (operator node:$value), node:$addr)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
index 3689f74..7706351 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -13,7 +13,7 @@ multiclass SXU<SDPatternOperator operator, Instruction insn> {
def : Pat<(operator (sext (i32 GR32:$src))),
(insn GR32:$src)>;
def : Pat<(operator (sext_inreg GR64:$src, i32)),
- (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+ (insn (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
}
// Record that INSN performs a 64-bit version of binary operator OPERATOR
@@ -24,7 +24,7 @@ multiclass SXB<SDPatternOperator operator, RegisterOperand cls,
def : Pat<(operator cls:$src1, (sext GR32:$src2)),
(insn cls:$src1, GR32:$src2)>;
def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)),
- (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
}
// Like SXB, but for zero extension.
@@ -33,7 +33,7 @@ multiclass ZXB<SDPatternOperator operator, RegisterOperand cls,
def : Pat<(operator cls:$src1, (zext GR32:$src2)),
(insn cls:$src1, GR32:$src2)>;
def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)),
- (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
}
// Record that INSN performs a binary read-modify-write operation,
@@ -50,12 +50,8 @@ class RMWI<SDPatternOperator load, SDPatternOperator operator,
// memory location. IMM is the type of the second operand.
multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
Instruction insn> {
- def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>;
- def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>;
- def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>;
- def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>;
- def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>;
- def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>;
+ def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>;
+ def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>;
}
// Record that INSN performs insertion TYPE into a register of class CLS.
@@ -69,3 +65,88 @@ multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
(load mode:$src2), cls:$src1),
(insn cls:$src1, mode:$src2)>;
}
+
+// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64<Instruction insn, SDPatternOperator operator,
+ AddressingMode mode>
+ : Pat<(operator GR64:$R1, mode:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), mode:$XBD2)>;
+
+// INSN and INSNY are an RX/RXY pair of instructions that store the low
+// 32 bits of a GPR to memory. Record that they are equivalent to using
+// OPERATOR to store a GR64.
+multiclass StoreGR64Pair<Instruction insn, Instruction insny,
+ SDPatternOperator operator> {
+ def : StoreGR64<insn, operator, bdxaddr12pair>;
+ def : StoreGR64<insny, operator, bdxaddr20pair>;
+}
+
+// INSN stores the low 32 bits of a GPR using PC-relative addressing.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64PC<Instruction insn, SDPatternOperator operator>
+ : Pat<(operator GR64:$R1, pcrel32:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), pcrel32:$XBD2)> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory,
+// with INSN storing when the condition is true and INSNINV storing when the
+// condition is false. Record that they are equivalent to a LOAD/select/STORE
+// sequence for GR64s.
+multiclass CondStores64<Instruction insn, Instruction insninv,
+ SDPatternOperator store, SDPatternOperator load,
+ AddressingMode mode> {
+ def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr),
+ (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ uimm8zx4:$valid, uimm8zx4:$cc)>;
+ def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr),
+ (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ uimm8zx4:$valid, uimm8zx4:$cc)>;
+}
+
+// Try to use MVC instruction INSN for a load of type LOAD followed by a store
+// of the same size. VT is the type of the intermediate (legalized) value and
+// LENGTH is the number of bytes loaded by LOAD.
+multiclass MVCLoadStore<SDPatternOperator load, ValueType vt, Instruction insn,
+ bits<5> length> {
+ def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
+ (insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
+}
+
+// Use NC-like instruction INSN for block_op operation OPERATOR.
+// The other operand is a load of type LOAD, which accesses LENGTH bytes.
+// VT is the intermediate legalized type in which the binary operation
+// is actually done.
+multiclass BinaryLoadStore<SDPatternOperator operator, SDPatternOperator load,
+ ValueType vt, Instruction insn, bits<5> length> {
+ def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
+ (insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
+}
+
+// A convenient way of generating all block peepholes for a particular
+// LOAD/VT/LENGTH combination.
+multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
+ Instruction mvc, Instruction nc, Instruction oc,
+ Instruction xc, bits<5> length> {
+ defm : MVCLoadStore<load, vt, mvc, length>;
+ defm : BinaryLoadStore<block_and1, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_and2, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_or1, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_or2, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_xor1, load, vt, xc, length>;
+ defm : BinaryLoadStore<block_xor2, load, vt, xc, length>;
+}
+
+// Record that INSN is a LOAD AND TEST that can be used to compare
+// registers in CLS against zero. The instruction has separate R1 and R2
+// operands, but they must be the same when the instruction is used like this.
+class CompareZeroFP<Instruction insn, RegisterOperand cls>
+ : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
new file mode 100644
index 0000000..f241fb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -0,0 +1,46 @@
+//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Processor and feature definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZFeature<string extname, string intname, string desc>
+ : Predicate<"Subtarget.has"##intname##"()">,
+ AssemblerPredicate<"Feature"##intname, extname>,
+ SubtargetFeature<extname, "Has"##intname, "true", desc>;
+
+def FeatureDistinctOps : SystemZFeature<
+ "distinct-ops", "DistinctOps",
+ "Assume that the distinct-operands facility is installed"
+>;
+
+def FeatureLoadStoreOnCond : SystemZFeature<
+ "load-store-on-cond", "LoadStoreOnCond",
+ "Assume that the load/store-on-condition facility is installed"
+>;
+
+def FeatureHighWord : SystemZFeature<
+ "high-word", "HighWord",
+ "Assume that the high-word facility is installed"
+>;
+
+def FeatureFPExtension : SystemZFeature<
+ "fp-extension", "FPExtension",
+ "Assume that the floating-point extension facility is installed"
+>;
+
+def : Processor<"generic", NoItineraries, []>;
+def : Processor<"z10", NoItineraries, []>;
+def : Processor<"z196", NoItineraries,
+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+ FeatureFPExtension]>;
+def : Processor<"zEC12", NoItineraries,
+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+ FeatureFPExtension]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index a0ae7ed..b61ae88 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -17,9 +17,8 @@
using namespace llvm;
-SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
- const SystemZInstrInfo &tii)
- : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm), TII(tii) {}
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm)
+ : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm) {}
const uint16_t*
SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -43,41 +42,19 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasFP(MF)) {
// R11D is the frame pointer. Reserve all aliases.
Reserved.set(SystemZ::R11D);
- Reserved.set(SystemZ::R11W);
+ Reserved.set(SystemZ::R11L);
+ Reserved.set(SystemZ::R11H);
Reserved.set(SystemZ::R10Q);
}
// R15D is the stack pointer. Reserve all aliases.
Reserved.set(SystemZ::R15D);
- Reserved.set(SystemZ::R15W);
+ Reserved.set(SystemZ::R15L);
+ Reserved.set(SystemZ::R15H);
Reserved.set(SystemZ::R14Q);
return Reserved;
}
-bool
-SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator SaveMBBI,
- MachineBasicBlock::iterator &UseMBBI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
- MachineFunction &MF = *MBB.getParent();
- const SystemZFrameLowering *TFI =
- static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
- unsigned Base = getFrameRegister(MF);
- uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF);
- DebugLoc DL;
-
- unsigned LoadOpcode, StoreOpcode;
- TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
-
- // The offset must always be in range of a 12-bit unsigned displacement.
- BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode))
- .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0);
- BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg)
- .addReg(Base).addImm(Offset).addReg(0);
- return true;
-}
-
void
SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
@@ -86,6 +63,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
+ const SystemZInstrInfo &TII =
+ *static_cast<const SystemZInstrInfo*>(TM.getInstrInfo());
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc DL = MI->getDebugLoc();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 91a70de..13f45fa 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -22,10 +22,10 @@ namespace SystemZ {
// Return the subreg to use for referring to the even and odd registers
// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
inline unsigned even128(bool Is32bit) {
- return Is32bit ? subreg_32bit : subreg_high;
+ return Is32bit ? subreg_hl32 : subreg_h64;
}
inline unsigned odd128(bool Is32bit) {
- return Is32bit ? subreg_low32 : subreg_low;
+ return Is32bit ? subreg_l32 : subreg_l64;
}
}
@@ -35,10 +35,9 @@ class SystemZInstrInfo;
struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
private:
SystemZTargetMachine &TM;
- const SystemZInstrInfo &TII;
public:
- SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+ SystemZRegisterInfo(SystemZTargetMachine &tm);
// Override TargetRegisterInfo.h.
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
@@ -49,15 +48,14 @@ public:
LLVM_OVERRIDE {
return true;
}
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
+ LLVM_OVERRIDE {
+ return true;
+ }
virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0)
const LLVM_OVERRIDE;
virtual BitVector getReservedRegs(const MachineFunction &MF)
const LLVM_OVERRIDE;
- virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator SaveMBBI,
- MachineBasicBlock::iterator &UseMBBI,
- const TargetRegisterClass *RC,
- unsigned Reg) const LLVM_OVERRIDE;
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const LLVM_OVERRIDE;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index bd1b563..93d7c83 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -21,10 +21,12 @@ class SystemZRegWithSubregs<string n, list<Register> subregs>
}
let Namespace = "SystemZ" in {
-def subreg_32bit : SubRegIndex; // could also be known as "subreg_high32"
-def subreg_high : SubRegIndex;
-def subreg_low : SubRegIndex;
-def subreg_low32 : SubRegIndex<[subreg_low, subreg_32bit]>;
+def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
+def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
+def subreg_l64 : SubRegIndex<64, 0>;
+def subreg_h64 : SubRegIndex<64, 64>;
+def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
+def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
}
// Define a register class that contains values of type TYPE and an
@@ -54,36 +56,49 @@ class GPR32<bits<16> num, string n> : SystemZReg<n> {
}
// One of the 16 64-bit general-purpose registers.
-class GPR64<bits<16> num, string n, GPR32 low>
- : SystemZRegWithSubregs<n, [low]> {
+class GPR64<bits<16> num, string n, GPR32 low, GPR32 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_32bit];
+ let SubRegIndices = [subreg_l32, subreg_h32];
}
// 8 even-odd pairs of GPR64s.
-class GPR128<bits<16> num, string n, GPR64 high, GPR64 low>
- : SystemZRegWithSubregs<n, [high, low]> {
+class GPR128<bits<16> num, string n, GPR64 low, GPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_high, subreg_low];
+ let SubRegIndices = [subreg_l64, subreg_h64];
}
// General-purpose registers
foreach I = 0-15 in {
- def R#I#W : GPR32<I, "r"#I>;
- def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>;
+ def R#I#L : GPR32<I, "r"#I>;
+ def R#I#H : GPR32<I, "r"#I>;
+ def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"L"), !cast<GPR32>("R"#I#"H")>,
+ DwarfRegNum<[I]>;
}
foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
- def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"),
- !cast<GPR64>("R"#!add(I, 1)#"D")>;
+ def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#!add(I, 1)#"D"),
+ !cast<GPR64>("R"#I#"D")>;
}
/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
/// together with R14 and R15 in one prolog instruction.
-defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW", 0, 5),
- (sequence "R%uW", 15, 6))>;
-defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
- (sequence "R%uD", 15, 6))>;
+defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5),
+ (sequence "R%uL", 15, 6))>;
+defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5),
+ (sequence "R%uH", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))>;
+
+// Combine the low and high GR32s into a single class. This can only be
+// used for virtual registers if the high-word facility is available.
+defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
+ (add (sequence "R%uL", 0, 5),
+ (sequence "R%uH", 0, 5),
+ R15L, R15H, R14L, R14H, R13L, R13H,
+ R12L, R12H, R11L, R11H, R10L, R10H,
+ R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>;
// The architecture doesn't really have any i128 support, so model the
// register pairs as untyped instead.
@@ -93,7 +108,7 @@ defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
// Base and index registers. Everything except R0, which in an address
// context evaluates as 0.
-defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>;
+defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>;
defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
@@ -113,14 +128,14 @@ class FPR32<bits<16> num, string n> : SystemZReg<n> {
class FPR64<bits<16> num, string n, FPR32 low>
: SystemZRegWithSubregs<n, [low]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_32bit];
+ let SubRegIndices = [subreg_h32];
}
// 8 pairs of FPR64s, with a one-register gap inbetween.
-class FPR128<bits<16> num, string n, FPR64 high, FPR64 low>
- : SystemZRegWithSubregs<n, [high, low]> {
+class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_high, subreg_low];
+ let SubRegIndices = [subreg_l64, subreg_h64];
}
// Floating-point registers
@@ -131,8 +146,8 @@ foreach I = 0-15 in {
}
foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
- def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"),
- !cast<FPR64>("F"#!add(I, 2)#"D")>;
+ def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"),
+ !cast<FPR64>("F"#I#"D")>;
}
// There's no store-multiple instruction for FPRs, so we're not fussy
@@ -146,5 +161,7 @@ defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q,
// Other registers
//===----------------------------------------------------------------------===//
-// Status register
-def PSW : SystemZReg<"psw">;
+// The 2-bit condition code field of the PSW. Every register named in an
+// inline asm needs a class associated with it.
+def CC : SystemZReg<"cc">;
+def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 0000000..c7ebb5d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,293 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+SystemZSelectionDAGInfo::
+SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
+
+// Decide whether it is best to use a loop or straight-line code for
+// a block operation of Size bytes with source address Src and destination
+// address Dest. Sequence is the opcode to use for straight-line code
+// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
+// Return the chain for the completed operation.
+static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
+ unsigned Loop, SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size) {
+ EVT PtrVT = Src.getValueType();
+ // The heuristic we use is to prefer loops for anything that would
+ // require 7 or more MVCs. With these kinds of sizes there isn't
+ // much to choose between straight-line code and looping code,
+ // since the time will be dominated by the MVCs themselves.
+ // However, the loop has 4 or 5 instructions (depending on whether
+ // the base addresses can be proved equal), so there doesn't seem
+ // much point using a loop for 5 * 256 bytes or fewer. Anything in
+ // the range (5 * 256, 6 * 256) will need another instruction after
+ // the loop, so it doesn't seem worth using a loop then either.
+ // The next value up, 6 * 256, can be implemented in the same
+ // number of straight-line MVCs as 6 * 256 - 1.
+ if (Size > 6 * 256)
+ return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, PtrVT),
+ DAG.getConstant(Size / 256, PtrVT));
+ return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, PtrVT));
+}
+
+SDValue SystemZSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+ bool IsVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ if (IsVolatile)
+ return SDValue();
+
+ if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size))
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, Dst, Src, CSize->getZExtValue());
+ return SDValue();
+}
+
+// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
+// Chain, Dst, ByteVal and Size. These cases are expected to use
+// MVI, MVHHI, MVHI and MVGHI respectively.
+static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dst, uint64_t ByteVal, uint64_t Size,
+ unsigned Align,
+ MachinePointerInfo DstPtrInfo) {
+ uint64_t StoreVal = ByteVal;
+ for (unsigned I = 1; I < Size; ++I)
+ StoreVal |= ByteVal << (I * 8);
+ return DAG.getStore(Chain, DL,
+ DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)),
+ Dst, DstPtrInfo, false, false, Align);
+}
+
+SDValue SystemZSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dst, SDValue Byte, SDValue Size,
+ unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ EVT PtrVT = Dst.getValueType();
+
+ if (IsVolatile)
+ return SDValue();
+
+ if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ uint64_t Bytes = CSize->getZExtValue();
+ if (Bytes == 0)
+ return SDValue();
+ if (ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte)) {
+ // Handle cases that can be done using at most two of
+ // MVI, MVHI, MVHHI and MVGHI. The latter two can only be
+ // used if ByteVal is all zeros or all ones; in other casees,
+ // we can move at most 2 halfwords.
+ uint64_t ByteVal = CByte->getZExtValue();
+ if (ByteVal == 0 || ByteVal == 255 ?
+ Bytes <= 16 && CountPopulation_64(Bytes) <= 2 :
+ Bytes <= 4) {
+ unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);
+ unsigned Size2 = Bytes - Size1;
+ SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1,
+ Align, DstPtrInfo);
+ if (Size2 == 0)
+ return Chain1;
+ Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(Size1, PtrVT));
+ DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
+ SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
+ std::min(Align, Size1), DstPtrInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
+ }
+ } else {
+ // Handle one and two bytes using STC.
+ if (Bytes <= 2) {
+ SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
+ false, false, Align);
+ if (Bytes == 1)
+ return Chain1;
+ SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, PtrVT));
+ SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
+ DstPtrInfo.getWithOffset(1),
+ false, false, 1);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
+ }
+ }
+ assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
+
+ // Handle the special case of a memset of 0, which can use XC.
+ ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte);
+ if (CByte && CByte->getZExtValue() == 0)
+ return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
+ Chain, Dst, Dst, Bytes);
+
+ // Copy the byte to the first location and then use MVC to copy
+ // it to the rest.
+ Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
+ false, false, Align);
+ SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, PtrVT));
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, DstPlus1, Dst, Bytes - 1);
+ }
+ return SDValue();
+}
+
+// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
+// deciding whether to use a loop or straight-line code.
+static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, uint64_t Size) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ EVT PtrVT = Src1.getValueType();
+ // A two-CLC sequence is a clear win over a loop, not least because it
+ // needs only one branch. A three-CLC sequence needs the same number
+ // of branches as a loop (i.e. 2), but is shorter. That brings us to
+ // lengths greater than 768 bytes. It seems relatively likely that
+ // a difference will be found within the first 768 bytes, so we just
+ // optimize for the smallest number of branch instructions, in order
+ // to avoid polluting the prediction buffer too much. A loop only ever
+ // needs 2 branches, whereas a straight-line sequence would need 3 or more.
+ if (Size > 3 * 256)
+ return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT),
+ DAG.getConstant(Size / 256, PtrVT));
+ return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT));
+}
+
+// Convert the current CC value into an integer that is 0 if CC == 0,
+// less than zero if CC == 1 and greater than zero if CC >= 2.
+// The sequence starts with IPM, which puts CC into bits 29 and 28
+// of an integer and clears bits 30 and 31.
+static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+ SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+ SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
+ DAG.getConstant(31, MVT::i32));
+ return ROTL;
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, SDValue Size,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ uint64_t Bytes = CSize->getZExtValue();
+ assert(Bytes > 0 && "Caller should have handled 0-size case");
+ Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+ SDValue Glue = Chain.getValue(1);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+ }
+ return std::make_pair(SDValue(), SDValue());
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Use SRST to find the character. End is its address on success.
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
+ Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
+ Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
+ DAG.getConstant(255, MVT::i32));
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, Char);
+ Chain = End.getValue(1);
+ SDValue Glue = End.getValue(2);
+
+ // Now select between End and null, depending on whether the character
+ // was found.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(End);
+ Ops.push_back(DAG.getConstant(0, PtrVT));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
+ Ops.push_back(Glue);
+ VTs = DAG.getVTList(PtrVT, MVT::Glue);
+ End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+ return std::make_pair(End, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dest, SDValue Src,
+ MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
+ SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
+ SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
+ DAG.getConstant(0, MVT::i32));
+ return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
+ SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(0, MVT::i32));
+ Chain = Unused.getValue(1);
+ SDValue Glue = Chain.getValue(2);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+}
+
+// Search from Src for a null character, stopping once Src reaches Limit.
+// Return a pair of values, the first being the number of nonnull characters
+// and the second being the out chain.
+//
+// This can be used for strlen by setting Limit to 0.
+static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
+ SDValue Chain, SDValue Src,
+ SDValue Limit) {
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, DAG.getConstant(0, MVT::i32));
+ Chain = End.getValue(1);
+ SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
+ return std::make_pair(Len, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue MaxLength,
+ MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
+ return getBoundedStrlen(DAG, DL, Chain, Src, Limit);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 0000000..281d1e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,80 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+ ~SystemZSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool IsVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const
+ LLVM_OVERRIDE;
+
+ virtual SDValue
+ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL,
+ SDValue Chain, SDValue Dst, SDValue Byte,
+ SDValue Size, unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, SDValue Size,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dest, SDValue Src,
+ MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo,
+ bool isStpcpy) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, MachinePointerInfo SrcPtrInfo) const
+ LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue MaxLength,
+ MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
new file mode 100644
index 0000000..537a545
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -0,0 +1,163 @@
+//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to replace instructions with shorter forms. For example,
+// IILF can be replaced with LLILL or LLILH if the constant fits and if the
+// other 32 bits of the GR64 destination are not live.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-shorten-inst"
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+ class SystemZShortenInst : public MachineFunctionPass {
+ public:
+ static char ID;
+ SystemZShortenInst(const SystemZTargetMachine &tm);
+
+ virtual const char *getPassName() const {
+ return "SystemZ Instruction Shortening";
+ }
+
+ bool processBlock(MachineBasicBlock *MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
+ unsigned LLIxL, unsigned LLIxH);
+
+ const SystemZInstrInfo *TII;
+
+ // LowGPRs[I] has bit N set if LLVM register I includes the low
+ // word of GPR N. HighGPRs is the same for the high word.
+ unsigned LowGPRs[SystemZ::NUM_TARGET_REGS];
+ unsigned HighGPRs[SystemZ::NUM_TARGET_REGS];
+ };
+
+ char SystemZShortenInst::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
+ return new SystemZShortenInst(TM);
+}
+
+SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(0), LowGPRs(), HighGPRs() {
+ // Set up LowGPRs and HighGPRs.
+ for (unsigned I = 0; I < 16; ++I) {
+ LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I;
+ LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
+ HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I;
+ HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
+ if (unsigned GR128 = SystemZMC::GR128Regs[I]) {
+ LowGPRs[GR128] |= 3 << I;
+ HighGPRs[GR128] |= 3 << I;
+ }
+ }
+}
+
+// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
+// are the halfword immediate loads for the same word. Try to use one of them
+// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high
+// words referenced by LLVM register X while LiveOther is the mask of low
+// words that are currently live, and vice versa.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
+ unsigned LiveOther, unsigned LLIxL,
+ unsigned LLIxH) {
+ unsigned Reg = MI.getOperand(0).getReg();
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ unsigned GPRs = GPRMap[Reg];
+ assert(GPRs != 0 && "Register must be a GPR");
+ if (GPRs & LiveOther)
+ return false;
+
+ uint64_t Imm = MI.getOperand(1).getImm();
+ if (SystemZ::isImmLL(Imm)) {
+ MI.setDesc(TII->get(LLIxL));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ return true;
+ }
+ if (SystemZ::isImmLH(Imm)) {
+ MI.setDesc(TII->get(LLIxH));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ MI.getOperand(1).setImm(Imm >> 16);
+ return true;
+ }
+ return false;
+}
+
+// Process all instructions in MBB. Return true if something changed.
+bool SystemZShortenInst::processBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ // Work out which words are live on exit from the block.
+ unsigned LiveLow = 0;
+ unsigned LiveHigh = 0;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+ LE = (*SI)->livein_end(); LI != LE; ++LI) {
+ unsigned Reg = *LI;
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ LiveLow |= LowGPRs[Reg];
+ LiveHigh |= HighGPRs[Reg];
+ }
+ }
+
+ // Iterate backwards through the block looking for instructions to change.
+ for (MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(),
+ MBBE = MBB->rend(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == SystemZ::IILF)
+ Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
+ SystemZ::LLILH);
+ else if (Opcode == SystemZ::IIHF)
+ Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
+ SystemZ::LLIHH);
+ unsigned UsedLow = 0;
+ unsigned UsedHigh = 0;
+ for (MachineInstr::mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (MO.isReg()) {
+ if (unsigned Reg = MO.getReg()) {
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ if (MO.isDef()) {
+ LiveLow &= ~LowGPRs[Reg];
+ LiveHigh &= ~HighGPRs[Reg];
+ } else if (!MO.isUndef()) {
+ UsedLow |= LowGPRs[Reg];
+ UsedHigh |= HighGPRs[Reg];
+ }
+ }
+ }
+ }
+ LiveLow |= UsedLow;
+ LiveHigh |= UsedHigh;
+ }
+
+ return Changed;
+}
+
+bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
+ MFI != MFE; ++MFI)
+ Changed |= processBlock(MFI);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index cfd3324..3971d5e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -9,6 +9,8 @@
#include "SystemZSubtarget.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Host.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -16,13 +18,22 @@
using namespace llvm;
+// Pin the vtabel to this file.
+void SystemZSubtarget::anchor() {}
+
SystemZSubtarget::SystemZSubtarget(const std::string &TT,
const std::string &CPU,
const std::string &FS)
- : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) {
+ : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
+ HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
+ TargetTriple(TT) {
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "z10";
+ CPUName = "generic";
+#if defined(__linux__) && defined(__s390x__)
+ if (CPUName == "generic")
+ CPUName = sys::getHostCPUName();
+#endif
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 8d4d450..5817491 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -26,6 +26,13 @@ class GlobalValue;
class StringRef;
class SystemZSubtarget : public SystemZGenSubtargetInfo {
+ virtual void anchor();
+protected:
+ bool HasDistinctOps;
+ bool HasLoadStoreOnCond;
+ bool HasHighWord;
+ bool HasFPExtension;
+
private:
Triple TargetTriple;
@@ -33,9 +40,24 @@ public:
SystemZSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS);
+ // This is important for reducing register pressure in vector code.
+ virtual bool useAA() const LLVM_OVERRIDE { return true; }
+
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ // Return true if the target has the distinct-operands facility.
+ bool hasDistinctOps() const { return HasDistinctOps; }
+
+ // Return true if the target has the load/store-on-condition facility.
+ bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; }
+
+ // Return true if the target has the high-word facility.
+ bool hasHighWord() const { return HasHighWord; }
+
+ // Return true if the target has the floating-point extension facility.
+ bool hasFPExtension() const { return HasFPExtension; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 8c4c456..dee92e9 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,6 +10,7 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -33,6 +34,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
"-f32:32-f64:64-f128:64-a0:8:16-n32:64"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameLowering(*this, Subtarget) {
+ initAsmInfo();
}
namespace {
@@ -46,15 +48,61 @@ public:
return getTM<SystemZTargetMachine>();
}
- virtual bool addInstSelector();
+ virtual void addIRPasses() LLVM_OVERRIDE;
+ virtual bool addInstSelector() LLVM_OVERRIDE;
+ virtual bool addPreSched2() LLVM_OVERRIDE;
+ virtual bool addPreEmitPass() LLVM_OVERRIDE;
};
} // end anonymous namespace
+void SystemZPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ addPass(createPartiallyInlineLibCallsPass());
+}
+
bool SystemZPassConfig::addInstSelector() {
addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
return false;
}
+bool SystemZPassConfig::addPreSched2() {
+ if (getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
+ addPass(&IfConverterID);
+ return true;
+}
+
+bool SystemZPassConfig::addPreEmitPass() {
+ // We eliminate comparisons here rather than earlier because some
+ // transformations can change the set of available CC values and we
+ // generally want those transformations to have priority. This is
+ // especially true in the commonest case where the result of the comparison
+ // is used by a single in-range branch instruction, since we will then
+ // be able to fuse the compare and the branch instead.
+ //
+ // For example, two-address NILF can sometimes be converted into
+ // three-address RISBLG. NILF produces a CC value that indicates whether
+ // the low word is zero, but RISBLG does not modify CC at all. On the
+ // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG.
+ // The CC value produced by NILL isn't useful for our purposes, but the
+ // value produced by RISBG can be used for any comparison with zero
+ // (not just equality). So there are some transformations that lose
+ // CC values (while still being worthwhile) and others that happen to make
+ // the CC result more useful than it was originally.
+ //
+ // Another reason is that we only want to use BRANCH ON COUNT in cases
+ // where we know that the count register is not going to be spilled.
+ //
+ // Doing it so late makes it more likely that a register will be reused
+ // between the comparison and the branch, but it isn't clear whether
+ // preventing that would be a win or not.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZElimComparePass(getSystemZTargetMachine()));
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()));
+ addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
+ return true;
+}
+
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SystemZPassConfig(this, PM);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 98614e7..a99a98e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -20,10 +20,10 @@
#include "SystemZInstrInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSubtarget.h"
+#include "SystemZSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
@@ -32,7 +32,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
const DataLayout DL;
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
- TargetSelectionDAGInfo TSInfo;
+ SystemZSelectionDAGInfo TSInfo;
SystemZFrameLowering FrameLowering;
public:
OpenPOWER on IntegriCloud