diff options
Diffstat (limited to 'contrib/llvm/lib/Target/SystemZ')
58 files changed, 23859 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp new file mode 100644 index 0000000..9c995bf --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -0,0 +1,928 @@ +//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +// Return true if Expr is in the range [MinValue, MaxValue]. +static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) { + if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) { + int64_t Value = CE->getValue(); + return Value >= MinValue && Value <= MaxValue; + } + return false; +} + +namespace { +enum RegisterKind { + GR32Reg, + GRH32Reg, + GR64Reg, + GR128Reg, + ADDR32Reg, + ADDR64Reg, + FP32Reg, + FP64Reg, + FP128Reg, + VR32Reg, + VR64Reg, + VR128Reg +}; + +enum MemoryKind { + BDMem, + BDXMem, + BDLMem, + BDVMem +}; + +class SystemZOperand : public MCParsedAsmOperand { +public: +private: + enum OperandKind { + KindInvalid, + KindToken, + KindReg, + KindAccessReg, + KindImm, + KindImmTLS, + KindMem + }; + + OperandKind Kind; + SMLoc StartLoc, EndLoc; + + // A string of length Length, starting at Data. + struct TokenOp { + const char *Data; + unsigned Length; + }; + + // LLVM register Num, which has kind Kind. In some ways it might be + // easier for this class to have a register bank (general, floating-point + // or access) and a raw register number (0-15). This would postpone the + // interpretation of the operand to the add*() methods and avoid the need + // for context-dependent parsing. However, we do things the current way + // because of the virtual getReg() method, which needs to distinguish + // between (say) %r0 used as a single register and %r0 used as a pair. + // Context-dependent parsing can also give us slightly better error + // messages when invalid pairs like %r1 are used. + struct RegOp { + RegisterKind Kind; + unsigned Num; + }; + + // Base + Disp + Index, where Base and Index are LLVM registers or 0. + // MemKind says what type of memory this is and RegKind says what type + // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand + // length for D(L,B)-style operands, otherwise it is null. + struct MemOp { + unsigned Base : 12; + unsigned Index : 12; + unsigned MemKind : 4; + unsigned RegKind : 4; + const MCExpr *Disp; + const MCExpr *Length; + }; + + // Imm is an immediate operand, and Sym is an optional TLS symbol + // for use with a __tls_get_offset marker relocation. + struct ImmTLSOp { + const MCExpr *Imm; + const MCExpr *Sym; + }; + + union { + TokenOp Token; + RegOp Reg; + unsigned AccessReg; + const MCExpr *Imm; + ImmTLSOp ImmTLS; + MemOp Mem; + }; + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::createImm(0)); + else if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + +public: + SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc) + : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) {} + + // Create particular kinds of operand. + static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc, + SMLoc EndLoc) { + return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc); + } + static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) { + auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc); + Op->Token.Data = Str.data(); + Op->Token.Length = Str.size(); + return Op; + } + static std::unique_ptr<SystemZOperand> + createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc); + Op->Reg.Kind = Kind; + Op->Reg.Num = Num; + return Op; + } + static std::unique_ptr<SystemZOperand> + createAccessReg(unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindAccessReg, StartLoc, EndLoc); + Op->AccessReg = Num; + return Op; + } + static std::unique_ptr<SystemZOperand> + createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc); + Op->Imm = Expr; + return Op; + } + static std::unique_ptr<SystemZOperand> + createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, + const MCExpr *Disp, unsigned Index, const MCExpr *Length, + SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc); + Op->Mem.MemKind = MemKind; + Op->Mem.RegKind = RegKind; + Op->Mem.Base = Base; + Op->Mem.Index = Index; + Op->Mem.Disp = Disp; + Op->Mem.Length = Length; + return Op; + } + static std::unique_ptr<SystemZOperand> + createImmTLS(const MCExpr *Imm, const MCExpr *Sym, + SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc); + Op->ImmTLS.Imm = Imm; + Op->ImmTLS.Sym = Sym; + return Op; + } + + // Token operands + bool isToken() const override { + return Kind == KindToken; + } + StringRef getToken() const { + assert(Kind == KindToken && "Not a token"); + return StringRef(Token.Data, Token.Length); + } + + // Register operands. + bool isReg() const override { + return Kind == KindReg; + } + bool isReg(RegisterKind RegKind) const { + return Kind == KindReg && Reg.Kind == RegKind; + } + unsigned getReg() const override { + assert(Kind == KindReg && "Not a register"); + return Reg.Num; + } + + // Access register operands. Access registers aren't exposed to LLVM + // as registers. + bool isAccessReg() const { + return Kind == KindAccessReg; + } + + // Immediate operands. + bool isImm() const override { + return Kind == KindImm; + } + bool isImm(int64_t MinValue, int64_t MaxValue) const { + return Kind == KindImm && inRange(Imm, MinValue, MaxValue); + } + const MCExpr *getImm() const { + assert(Kind == KindImm && "Not an immediate"); + return Imm; + } + + // Immediate operands with optional TLS symbol. + bool isImmTLS() const { + return Kind == KindImmTLS; + } + + // Memory operands. + bool isMem() const override { + return Kind == KindMem; + } + bool isMem(MemoryKind MemKind) const { + return (Kind == KindMem && + (Mem.MemKind == MemKind || + // A BDMem can be treated as a BDXMem in which the index + // register field is 0. + (Mem.MemKind == BDMem && MemKind == BDXMem))); + } + bool isMem(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind) && Mem.RegKind == RegKind; + } + bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff); + } + bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287); + } + bool isMemDisp12Len8(RegisterKind RegKind) const { + return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100); + } + void addBDVAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDVMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::createReg(Mem.Index)); + } + + // Override MCParsedAsmOperand. + SMLoc getStartLoc() const override { return StartLoc; } + SMLoc getEndLoc() const override { return EndLoc; } + void print(raw_ostream &OS) const override; + + // Used by the TableGen code to add particular types of operand + // to an instruction. + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + void addAccessRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + assert(Kind == KindAccessReg && "Invalid operand type"); + Inst.addOperand(MCOperand::createImm(AccessReg)); + } + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + addExpr(Inst, getImm()); + } + void addBDAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands"); + assert(isMem(BDMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + } + void addBDXAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDXMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::createReg(Mem.Index)); + } + void addBDLAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDLMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + addExpr(Inst, Mem.Length); + } + void addImmTLSOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands"); + assert(Kind == KindImmTLS && "Invalid operand type"); + addExpr(Inst, ImmTLS.Imm); + if (ImmTLS.Sym) + addExpr(Inst, ImmTLS.Sym); + } + + // Used by the TableGen code to check for particular operand types. + bool isGR32() const { return isReg(GR32Reg); } + bool isGRH32() const { return isReg(GRH32Reg); } + bool isGRX32() const { return false; } + bool isGR64() const { return isReg(GR64Reg); } + bool isGR128() const { return isReg(GR128Reg); } + bool isADDR32() const { return isReg(ADDR32Reg); } + bool isADDR64() const { return isReg(ADDR64Reg); } + bool isADDR128() const { return false; } + bool isFP32() const { return isReg(FP32Reg); } + bool isFP64() const { return isReg(FP64Reg); } + bool isFP128() const { return isReg(FP128Reg); } + bool isVR32() const { return isReg(VR32Reg); } + bool isVR64() const { return isReg(VR64Reg); } + bool isVF128() const { return false; } + bool isVR128() const { return isReg(VR128Reg); } + bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } + bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } + bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); } + bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); } + bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); } + bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); } + bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } + bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); } + bool isU1Imm() const { return isImm(0, 1); } + bool isU2Imm() const { return isImm(0, 3); } + bool isU3Imm() const { return isImm(0, 7); } + bool isU4Imm() const { return isImm(0, 15); } + bool isU6Imm() const { return isImm(0, 63); } + bool isU8Imm() const { return isImm(0, 255); } + bool isS8Imm() const { return isImm(-128, 127); } + bool isU12Imm() const { return isImm(0, 4095); } + bool isU16Imm() const { return isImm(0, 65535); } + bool isS16Imm() const { return isImm(-32768, 32767); } + bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); } + bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); } +}; + +class SystemZAsmParser : public MCTargetAsmParser { +#define GET_ASSEMBLER_HEADER +#include "SystemZGenAsmMatcher.inc" + +private: + MCAsmParser &Parser; + enum RegisterGroup { + RegGR, + RegFP, + RegV, + RegAccess + }; + struct Register { + RegisterGroup Group; + unsigned Num; + SMLoc StartLoc, EndLoc; + }; + + bool parseRegister(Register &Reg); + + bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs, + bool IsAddress = false); + + OperandMatchResultTy parseRegister(OperandVector &Operands, + RegisterGroup Group, const unsigned *Regs, + RegisterKind Kind); + + bool parseAddress(unsigned &Base, const MCExpr *&Disp, + unsigned &Index, bool &IsVector, const MCExpr *&Length, + const unsigned *Regs, RegisterKind RegKind); + + OperandMatchResultTy parseAddress(OperandVector &Operands, + MemoryKind MemKind, const unsigned *Regs, + RegisterKind RegKind); + + OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, + int64_t MaxVal, bool AllowTLS); + + bool parseOperand(OperandVector &Operands, StringRef Mnemonic); + +public: + SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(Options, sti), Parser(parser) { + MCAsmParserExtension::Initialize(Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + } + + // Override MCTargetAsmParser. + bool ParseDirective(AsmToken DirectiveID) override; + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + + // Used by the TableGen code to parse particular operand types. + OperandMatchResultTy parseGR32(OperandVector &Operands) { + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); + } + OperandMatchResultTy parseGRH32(OperandVector &Operands) { + return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg); + } + OperandMatchResultTy parseGRX32(OperandVector &Operands) { + llvm_unreachable("GRX32 should only be used for pseudo instructions"); + } + OperandMatchResultTy parseGR64(OperandVector &Operands) { + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); + } + OperandMatchResultTy parseGR128(OperandVector &Operands) { + return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); + } + OperandMatchResultTy parseADDR32(OperandVector &Operands) { + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg); + } + OperandMatchResultTy parseADDR64(OperandVector &Operands) { + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseADDR128(OperandVector &Operands) { + llvm_unreachable("Shouldn't be used as an operand"); + } + OperandMatchResultTy parseFP32(OperandVector &Operands) { + return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); + } + OperandMatchResultTy parseFP64(OperandVector &Operands) { + return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); + } + OperandMatchResultTy parseFP128(OperandVector &Operands) { + return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); + } + OperandMatchResultTy parseVR32(OperandVector &Operands) { + return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg); + } + OperandMatchResultTy parseVR64(OperandVector &Operands) { + return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg); + } + OperandMatchResultTy parseVF128(OperandVector &Operands) { + llvm_unreachable("Shouldn't be used as an operand"); + } + OperandMatchResultTy parseVR128(OperandVector &Operands) { + return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg); + } + OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { + return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg); + } + OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseAccessReg(OperandVector &Operands); + OperandMatchResultTy parsePCRel16(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false); + } + OperandMatchResultTy parsePCRel32(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false); + } + OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true); + } + OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true); + } +}; +} // end anonymous namespace + +#define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME +#define GET_MATCHER_IMPLEMENTATION +#include "SystemZGenAsmMatcher.inc" + +void SystemZOperand::print(raw_ostream &OS) const { + llvm_unreachable("Not implemented"); +} + +// Parse one register of the form %<prefix><number>. +bool SystemZAsmParser::parseRegister(Register &Reg) { + Reg.StartLoc = Parser.getTok().getLoc(); + + // Eat the % prefix. + if (Parser.getTok().isNot(AsmToken::Percent)) + return Error(Parser.getTok().getLoc(), "register expected"); + Parser.Lex(); + + // Expect a register name. + if (Parser.getTok().isNot(AsmToken::Identifier)) + return Error(Reg.StartLoc, "invalid register"); + + // Check that there's a prefix. + StringRef Name = Parser.getTok().getString(); + if (Name.size() < 2) + return Error(Reg.StartLoc, "invalid register"); + char Prefix = Name[0]; + + // Treat the rest of the register name as a register number. + if (Name.substr(1).getAsInteger(10, Reg.Num)) + return Error(Reg.StartLoc, "invalid register"); + + // Look for valid combinations of prefix and number. + if (Prefix == 'r' && Reg.Num < 16) + Reg.Group = RegGR; + else if (Prefix == 'f' && Reg.Num < 16) + Reg.Group = RegFP; + else if (Prefix == 'v' && Reg.Num < 32) + Reg.Group = RegV; + else if (Prefix == 'a' && Reg.Num < 16) + Reg.Group = RegAccess; + else + return Error(Reg.StartLoc, "invalid register"); + + Reg.EndLoc = Parser.getTok().getLoc(); + Parser.Lex(); + return false; +} + +// Parse a register of group Group. If Regs is nonnull, use it to map +// the raw register number to LLVM numbering, with zero entries +// indicating an invalid register. IsAddress says whether the +// register appears in an address context. Allow FP Group if expecting +// RegV Group, since the f-prefix yields the FP group even while used +// with vector instructions. +bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, + const unsigned *Regs, bool IsAddress) { + if (parseRegister(Reg)) + return true; + if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV)) + return Error(Reg.StartLoc, "invalid operand for instruction"); + if (Regs && Regs[Reg.Num] == 0) + return Error(Reg.StartLoc, "invalid register pair"); + if (Reg.Num == 0 && IsAddress) + return Error(Reg.StartLoc, "%r0 used in an address"); + if (Regs) + Reg.Num = Regs[Reg.Num]; + return false; +} + +// Parse a register and add it to Operands. The other arguments are as above. +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group, + const unsigned *Regs, RegisterKind Kind) { + if (Parser.getTok().isNot(AsmToken::Percent)) + return MatchOperand_NoMatch; + + Register Reg; + bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg); + if (parseRegister(Reg, Group, Regs, IsAddress)) + return MatchOperand_ParseFail; + + Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num, + Reg.StartLoc, Reg.EndLoc)); + return MatchOperand_Success; +} + +// Parse a memory operand into Base, Disp, Index and Length. +// Regs maps asm register numbers to LLVM register numbers and RegKind +// says what kind of address register we're using (ADDR32Reg or ADDR64Reg). +bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, + unsigned &Index, bool &IsVector, + const MCExpr *&Length, const unsigned *Regs, + RegisterKind RegKind) { + // Parse the displacement, which must always be present. + if (getParser().parseExpression(Disp)) + return true; + + // Parse the optional base and index. + Index = 0; + Base = 0; + IsVector = false; + Length = nullptr; + if (getLexer().is(AsmToken::LParen)) { + Parser.Lex(); + + if (getLexer().is(AsmToken::Percent)) { + // Parse the first register and decide whether it's a base or an index. + Register Reg; + if (parseRegister(Reg)) + return true; + if (Reg.Group == RegV) { + // A vector index register. The base register is optional. + IsVector = true; + Index = SystemZMC::VR128Regs[Reg.Num]; + } else if (Reg.Group == RegGR) { + if (Reg.Num == 0) + return Error(Reg.StartLoc, "%r0 used in an address"); + // If the are two registers, the first one is the index and the + // second is the base. + if (getLexer().is(AsmToken::Comma)) + Index = Regs[Reg.Num]; + else + Base = Regs[Reg.Num]; + } else + return Error(Reg.StartLoc, "invalid address register"); + } else { + // Parse the length. + if (getParser().parseExpression(Length)) + return true; + } + + // Check whether there's a second register. It's the base if so. + if (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); + Register Reg; + if (parseRegister(Reg, RegGR, Regs, RegKind)) + return true; + Base = Reg.Num; + } + + // Consume the closing bracket. + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "unexpected token in address"); + Parser.Lex(); + } + return false; +} + +// Parse a memory operand and add it to Operands. The other arguments +// are as above. +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, + const unsigned *Regs, RegisterKind RegKind) { + SMLoc StartLoc = Parser.getTok().getLoc(); + unsigned Base, Index; + bool IsVector; + const MCExpr *Disp; + const MCExpr *Length; + if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind)) + return MatchOperand_ParseFail; + + if (IsVector && MemKind != BDVMem) { + Error(StartLoc, "invalid use of vector addressing"); + return MatchOperand_ParseFail; + } + + if (!IsVector && MemKind == BDVMem) { + Error(StartLoc, "vector index required in address"); + return MatchOperand_ParseFail; + } + + if (Index && MemKind != BDXMem && MemKind != BDVMem) { + Error(StartLoc, "invalid use of indexed addressing"); + return MatchOperand_ParseFail; + } + + if (Length && MemKind != BDLMem) { + Error(StartLoc, "invalid use of length addressing"); + return MatchOperand_ParseFail; + } + + if (!Length && MemKind == BDLMem) { + Error(StartLoc, "missing length in address"); + return MatchOperand_ParseFail; + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp, + Index, Length, StartLoc, + EndLoc)); + return MatchOperand_Success; +} + +bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) { + return true; +} + +bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + Register Reg; + if (parseRegister(Reg)) + return true; + if (Reg.Group == RegGR) + RegNo = SystemZMC::GR64Regs[Reg.Num]; + else if (Reg.Group == RegFP) + RegNo = SystemZMC::FP64Regs[Reg.Num]; + else if (Reg.Group == RegV) + RegNo = SystemZMC::VR128Regs[Reg.Num]; + else + // FIXME: Access registers aren't modelled as LLVM registers yet. + return Error(Reg.StartLoc, "invalid operand for instruction"); + StartLoc = Reg.StartLoc; + EndLoc = Reg.EndLoc; + return false; +} + +bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { + Operands.push_back(SystemZOperand::createToken(Name, NameLoc)); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (parseOperand(Operands, Name)) { + Parser.eatToEndOfStatement(); + return true; + } + + // Read any subsequent operands. + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); + if (parseOperand(Operands, Name)) { + Parser.eatToEndOfStatement(); + return true; + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + + // Consume the EndOfStatement. + Parser.Lex(); + return false; +} + +bool SystemZAsmParser::parseOperand(OperandVector &Operands, + StringRef Mnemonic) { + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + // Check for a register. All real register operands should have used + // a context-dependent parse routine, which gives the required register + // class. The code is here to mop up other cases, like those where + // the instruction isn't recognized. + if (Parser.getTok().is(AsmToken::Percent)) { + Register Reg; + if (parseRegister(Reg)) + return true; + Operands.push_back(SystemZOperand::createInvalid(Reg.StartLoc, Reg.EndLoc)); + return false; + } + + // The only other type of operand is an immediate or address. As above, + // real address operands should have used a context-dependent parse routine, + // so we treat any plain expression as an immediate. + SMLoc StartLoc = Parser.getTok().getLoc(); + unsigned Base, Index; + bool IsVector; + const MCExpr *Expr, *Length; + if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs, + ADDR64Reg)) + return true; + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + if (Base || Index || Length) + Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc)); + else + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + return false; +} + +bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult; + + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); + switch (MatchResult) { + case Match_Success: + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst, getSTI()); + return false; + + case Match_MissingFeature: { + assert(ErrorInfo && "Unknown missing feature!"); + // Special case the error message for the very common case where only + // a single subtarget feature is missing + std::string Msg = "instruction requires:"; + uint64_t Mask = 1; + for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) { + if (ErrorInfo & Mask) { + Msg += " "; + Msg += getSubtargetFeatureName(ErrorInfo & Mask); + } + Mask <<= 1; + } + return Error(IDLoc, Msg); + } + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((SystemZOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } + + llvm_unreachable("Unexpected match type"); +} + +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseAccessReg(OperandVector &Operands) { + if (Parser.getTok().isNot(AsmToken::Percent)) + return MatchOperand_NoMatch; + + Register Reg; + if (parseRegister(Reg, RegAccess, nullptr)) + return MatchOperand_ParseFail; + + Operands.push_back(SystemZOperand::createAccessReg(Reg.Num, + Reg.StartLoc, + Reg.EndLoc)); + return MatchOperand_Success; +} + +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, + int64_t MaxVal, bool AllowTLS) { + MCContext &Ctx = getContext(); + MCStreamer &Out = getStreamer(); + const MCExpr *Expr; + SMLoc StartLoc = Parser.getTok().getLoc(); + if (getParser().parseExpression(Expr)) + return MatchOperand_NoMatch; + + // For consistency with the GNU assembler, treat immediates as offsets + // from ".". + if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) { + int64_t Value = CE->getValue(); + if ((Value & 1) || Value < MinVal || Value > MaxVal) { + Error(StartLoc, "offset out of range"); + return MatchOperand_ParseFail; + } + MCSymbol *Sym = Ctx.createTempSymbol(); + Out.EmitLabel(Sym); + const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, + Ctx); + Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx); + } + + // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. + const MCExpr *Sym = nullptr; + if (AllowTLS && getLexer().is(AsmToken::Colon)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), "unexpected token"); + return MatchOperand_ParseFail; + } + + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + StringRef Name = Parser.getTok().getString(); + if (Name == "tls_gdcall") + Kind = MCSymbolRefExpr::VK_TLSGD; + else if (Name == "tls_ldcall") + Kind = MCSymbolRefExpr::VK_TLSLDM; + else { + Error(Parser.getTok().getLoc(), "unknown TLS tag"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), "unexpected token"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), "unexpected token"); + return MatchOperand_ParseFail; + } + + StringRef Identifier = Parser.getTok().getString(); + Sym = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(Identifier), + Kind, Ctx); + Parser.Lex(); + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + if (AllowTLS) + Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym, + StartLoc, EndLoc)); + else + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + + return MatchOperand_Success; +} + +// Force static initialization. +extern "C" void LLVMInitializeSystemZAsmParser() { + RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget); +} diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp new file mode 100644 index 0000000..bf67b75 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -0,0 +1,379 @@ +//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-disassembler" + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class SystemZDisassembler : public MCDisassembler { +public: + SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + ~SystemZDisassembler() override {} + + DecodeStatus getInstruction(MCInst &instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; +}; +} // end anonymous namespace + +static MCDisassembler *createSystemZDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new SystemZDisassembler(STI, Ctx); +} + +extern "C" void LLVMInitializeSystemZDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheSystemZTarget, + createSystemZDisassembler); +} + +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned *Regs, unsigned Size) { + assert(RegNo < Size && "Invalid register"); + RegNo = Regs[RegNo]; + if (RegNo == 0) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16); +} + +static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16); +} + +static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); +} + +static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16); +} + +static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); +} + +static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16); +} + +static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16); +} + +static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16); +} + +static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32); +} + +static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32); +} + +static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32); +} + +template<unsigned N> +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { + if (!isUInt<N>(Imm)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + +template<unsigned N> +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { + if (!isUInt<N>(Imm)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<1>(Inst, Imm); +} + +static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<2>(Inst, Imm); +} + +static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<3>(Inst, Imm); +} + +static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<6>(Inst, Imm); +} + +static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<12>(Inst, Imm); +} + +static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<32>(Inst, Imm); +} + +static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<32>(Inst, Imm); +} + +template<unsigned N> +static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address) { + assert(isUInt<N>(Imm) && "Invalid PC-relative offset"); + Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) * 2 + Address)); + return MCDisassembler::Success; +} + +static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<16>(Inst, Imm, Address); +} + +static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<32>(Inst, Imm, Address); +} + +static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 12; + uint64_t Disp = Field & 0xfff; + assert(Base < 16 && "Invalid BDAddr12"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 20; + uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff); + assert(Base < 16 && "Invalid BDAddr20"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Index < 16 && "Invalid BDXAddr12"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 24; + uint64_t Base = (Field >> 20) & 0xf; + uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12); + assert(Index < 16 && "Invalid BDXAddr20"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp))); + Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Length = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Length < 256 && "Invalid BDLAddr12Len8"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + Inst.addOperand(MCOperand::createImm(Length + 1)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Index < 32 && "Invalid BDVAddr12"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + Inst.addOperand(MCOperand::createReg(SystemZMC::VR128Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, + uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +#include "SystemZGenDisassemblerTables.inc" + +DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &OS, + raw_ostream &CS) const { + // Get the first two bytes of the instruction. + Size = 0; + if (Bytes.size() < 2) + return MCDisassembler::Fail; + + // The top 2 bits of the first byte specify the size. + const uint8_t *Table; + if (Bytes[0] < 0x40) { + Size = 2; + Table = DecoderTable16; + } else if (Bytes[0] < 0xc0) { + Size = 4; + Table = DecoderTable32; + } else { + Size = 6; + Table = DecoderTable48; + } + + // Read any remaining bytes. + if (Bytes.size() < Size) + return MCDisassembler::Fail; + + // Construct the instruction. + uint64_t Inst = 0; + for (uint64_t I = 0; I < Size; ++I) + Inst = (Inst << 8) | Bytes[I]; + + return decodeInstruction(Table, MI, Inst, Address, this, STI); +} diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp new file mode 100644 index 0000000..6444cf8 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -0,0 +1,222 @@ +//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZInstPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#include "SystemZGenAsmWriter.inc" + +void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, + unsigned Index, raw_ostream &O) { + O << Disp; + if (Base || Index) { + O << '('; + if (Index) { + O << '%' << getRegisterName(Index); + if (Base) + O << ','; + } + if (Base) + O << '%' << getRegisterName(Base); + O << ')'; + } +} + +void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, + raw_ostream &O) { + if (MO.isReg()) + O << '%' << getRegisterName(MO.getReg()); + else if (MO.isImm()) + O << MO.getImm(); + else if (MO.isExpr()) + MO.getExpr()->print(O, MAI); + else + llvm_unreachable("Invalid operand"); +} + +void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot, + const MCSubtargetInfo &STI) { + printInstruction(MI, O); + printAnnotation(O, Annot); +} + +void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + O << '%' << getRegisterName(RegNo); +} + +template <unsigned N> +static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isUInt<N>(Value) && "Invalid uimm argument"); + O << Value; +} + +template <unsigned N> +static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isInt<N>(Value) && "Invalid simm argument"); + O << Value; +} + +void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<1>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<2>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<3>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<4>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<6>(MI, OpNum, O); +} + +void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printSImmOperand<8>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<8>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<12>(MI, OpNum, O); +} + +void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printSImmOperand<16>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<16>(MI, OpNum, O); +} + +void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printSImmOperand<32>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<32>(MI, OpNum, O); +} + +void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + uint64_t Value = MI->getOperand(OpNum).getImm(); + assert(Value < 16 && "Invalid access register number"); + O << "%a" << (unsigned int)Value; +} + +void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "0x"; + O.write_hex(MO.getImm()); + } else + MO.getExpr()->print(O, &MAI); +} + +void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + // Output the PC-relative operand. + printPCRelOperand(MI, OpNum, O); + + // Output the TLS marker if present. + if ((unsigned)OpNum + 1 < MI->getNumOperands()) { + const MCOperand &MO = MI->getOperand(OpNum + 1); + const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr()); + switch (refExp.getKind()) { + case MCSymbolRefExpr::VK_TLSGD: + O << ":tls_gdcall:"; + break; + case MCSymbolRefExpr::VK_TLSLDM: + O << ":tls_ldcall:"; + break; + default: + llvm_unreachable("Unexpected symbol kind"); + } + O << refExp.getSymbol().getName(); + } +} + +void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printOperand(MI->getOperand(OpNum), &MAI, O); +} + +void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(MI->getOperand(OpNum).getReg(), + MI->getOperand(OpNum + 1).getImm(), 0, O); +} + +void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(MI->getOperand(OpNum).getReg(), + MI->getOperand(OpNum + 1).getImm(), + MI->getOperand(OpNum + 2).getReg(), O); +} + +void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + unsigned Base = MI->getOperand(OpNum).getReg(); + uint64_t Disp = MI->getOperand(OpNum + 1).getImm(); + uint64_t Length = MI->getOperand(OpNum + 2).getImm(); + O << Disp << '(' << Length; + if (Base) + O << ",%" << getRegisterName(Base); + O << ')'; +} + +void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(MI->getOperand(OpNum).getReg(), + MI->getOperand(OpNum + 1).getImm(), + MI->getOperand(OpNum + 2).getReg(), O); +} + +void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, + raw_ostream &O) { + static const char *const CondNames[] = { + "o", "h", "nle", "l", "nhe", "lh", "ne", + "e", "nlh", "he", "nl", "le", "nh", "no" + }; + uint64_t Imm = MI->getOperand(OpNum).getImm(); + assert(Imm > 0 && Imm < 15 && "Invalid condition"); + O << CondNames[Imm - 1]; +} diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h new file mode 100644 index 0000000..7ca386f --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -0,0 +1,74 @@ +//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a SystemZ MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H +#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { +class MCOperand; + +class SystemZInstPrinter : public MCInstPrinter { +public: + SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + // Automatically generated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + // Print an address with the given base, displacement and index. + static void printAddress(unsigned Base, int64_t Disp, unsigned Index, + raw_ostream &O); + + // Print the given operand. + static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI, + raw_ostream &O); + + // Override MCInstPrinter. + void printRegName(raw_ostream &O, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + +private: + // Print various types of operand. + void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); + + // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) + // This forms part of the instruction name rather than the operand list. + void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O); +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp new file mode 100644 index 0000000..57eebe1 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -0,0 +1,117 @@ +//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "MCTargetDesc/SystemZMCFixups.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot]. +// Return the bits that should be installed in a relocation field for +// fixup kind Kind. +static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) { + if (Kind < FirstTargetFixupKind) + return Value; + + switch (unsigned(Kind)) { + case SystemZ::FK_390_PC16DBL: + case SystemZ::FK_390_PC32DBL: + return (int64_t)Value / 2; + + case SystemZ::FK_390_TLS_CALL: + return 0; + } + + llvm_unreachable("Unknown fixup kind!"); +} + +namespace { +class SystemZMCAsmBackend : public MCAsmBackend { + uint8_t OSABI; +public: + SystemZMCAsmBackend(uint8_t osABI) + : OSABI(osABI) {} + + // Override MCAsmBackend + unsigned getNumFixupKinds() const override { + return SystemZ::NumTargetFixupKinds; + } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + bool mayNeedRelaxation(const MCInst &Inst) const override { + return false; + } + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *Fragment, + const MCAsmLayout &Layout) const override { + return false; + } + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { + llvm_unreachable("SystemZ does do not have assembler relaxation"); + } + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createSystemZObjectWriter(OS, OSABI); + } +}; +} // end anonymous namespace + +const MCFixupKindInfo & +SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = { + { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_390_TLS_CALL", 0, 0, 0 } + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; +} + +void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { + MCFixupKind Kind = Fixup.getKind(); + unsigned Offset = Fixup.getOffset(); + unsigned Size = (getFixupKindInfo(Kind).TargetSize + 7) / 8; + + assert(Offset + Size <= DataSize && "Invalid fixup offset!"); + + // Big-endian insertion of Size bytes. + Value = extractBitsForFixup(Kind, Value); + unsigned ShiftValue = (Size * 8) - 8; + for (unsigned I = 0; I != Size; ++I) { + Data[Offset + I] |= uint8_t(Value >> ShiftValue); + ShiftValue -= 8; + } +} + +bool SystemZMCAsmBackend::writeNopData(uint64_t Count, + MCObjectWriter *OW) const { + for (uint64_t I = 0; I != Count; ++I) + OW->write8(7); + return true; +} + +MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, StringRef CPU) { + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + return new SystemZMCAsmBackend(OSABI); +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp new file mode 100644 index 0000000..b17977d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -0,0 +1,29 @@ +//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" + +using namespace llvm; + +SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { + PointerSize = 8; + CalleeSaveStackSlotSize = 8; + IsLittleEndian = false; + + CommentString = "#"; + ZeroDirective = "\t.space\t"; + Data64bitsDirective = "\t.quad\t"; + UsesELFSectionDirectiveForBSS = true; + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::DwarfCFI; + + UseIntegratedAssembler = true; +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h new file mode 100644 index 0000000..800f892 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h @@ -0,0 +1,26 @@ +//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class Triple; + +class SystemZMCAsmInfo : public MCAsmInfoELF { +public: + explicit SystemZMCAsmInfo(const Triple &TT); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp new file mode 100644 index 0000000..fd52a2e --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -0,0 +1,243 @@ +//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "MCTargetDesc/SystemZMCFixups.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +namespace { +class SystemZMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + MCContext &Ctx; + +public: + SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : MCII(mcii), Ctx(ctx) { + } + + ~SystemZMCCodeEmitter() override {} + + // OVerride MCCodeEmitter. + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; + +private: + // Automatically generated by TableGen. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + // Called by the TableGen code to get the binary encoding of operand + // MO in MI. Fixups is the list of fixups against MI. + uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + // Called by the TableGen code to get the binary encoding of an address. + // The index or length, if any, is encoded first, followed by the base, + // followed by the displacement. In a 20-bit displacement, + // the low 12 bits are encoded before the high 8 bits. + uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + // Operand OpNum of MI needs a PC-relative fixup of kind Kind at + // Offset bytes from the start of MI. Add the fixup to Fixups + // and return the in-place addend, which since we're a RELA target + // is always 0. If AllowTLS is true and optional operand OpNum + 1 + // is present, also emit a TLS call fixup for it. + uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + unsigned Kind, int64_t Offset, + bool AllowTLS) const; + + uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC16DBL, 2, false); + } + uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC32DBL, 2, false); + } + uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC16DBL, 2, true); + } + uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC32DBL, 2, true); + } +}; +} // end anonymous namespace + +MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new SystemZMCCodeEmitter(MCII, Ctx); +} + +void SystemZMCCodeEmitter:: +encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); + unsigned Size = MCII.get(MI.getOpcode()).getSize(); + // Big-endian insertion of Size bytes. + unsigned ShiftValue = (Size * 8) - 8; + for (unsigned I = 0; I != Size; ++I) { + OS << uint8_t(Bits >> ShiftValue); + ShiftValue -= 8; + } +} + +uint64_t SystemZMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); + if (MO.isImm()) + return static_cast<uint64_t>(MO.getImm()); + llvm_unreachable("Unexpected operand type!"); +} + +uint64_t SystemZMCCodeEmitter:: +getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + assert(isUInt<4>(Base) && isUInt<12>(Disp)); + return (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + assert(isUInt<4>(Base) && isInt<20>(Disp)); + return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12); +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI); + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index)); + return (Index << 16) | (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI); + assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index)); + return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8) + | ((Disp & 0xff000) >> 12); +} + +uint64_t SystemZMCCodeEmitter:: +getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1; + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len)); + return (Len << 16) | (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI); + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index)); + return (Index << 16) | (Base << 12) | Disp; +} + +uint64_t +SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + unsigned Kind, int64_t Offset, + bool AllowTLS) const { + const MCOperand &MO = MI.getOperand(OpNum); + const MCExpr *Expr; + if (MO.isImm()) + Expr = MCConstantExpr::create(MO.getImm() + Offset, Ctx); + else { + Expr = MO.getExpr(); + if (Offset) { + // The operand value is relative to the start of MI, but the fixup + // is relative to the operand field itself, which is Offset bytes + // into MI. Add Offset to the relocation value to cancel out + // this difference. + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); + } + } + Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind)); + + // Output the fixup for the TLS marker if present. + if (AllowTLS && OpNum + 1 < MI.getNumOperands()) { + const MCOperand &MOTLS = MI.getOperand(OpNum + 1); + Fixups.push_back(MCFixup::create(0, MOTLS.getExpr(), + (MCFixupKind)SystemZ::FK_390_TLS_CALL)); + } + return 0; +} + +#include "SystemZGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h new file mode 100644 index 0000000..229ab5d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h @@ -0,0 +1,30 @@ +//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace SystemZ { +enum FixupKind { + // These correspond directly to R_390_* relocations. + FK_390_PC16DBL = FirstTargetFixupKind, + FK_390_PC32DBL, + FK_390_TLS_CALL, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // end namespace SystemZ +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp new file mode 100644 index 0000000..ee1af02 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -0,0 +1,159 @@ +//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "MCTargetDesc/SystemZMCFixups.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +using namespace llvm; + +namespace { +class SystemZObjectWriter : public MCELFObjectTargetWriter { +public: + SystemZObjectWriter(uint8_t OSABI); + + ~SystemZObjectWriter() override; + +protected: + // Override MCELFObjectTargetWriter. + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override; +}; +} // end anonymous namespace + +SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390, + /*HasRelocationAddend=*/ true) {} + +SystemZObjectWriter::~SystemZObjectWriter() { +} + +// Return the relocation type for an absolute value of MCFixupKind Kind. +static unsigned getAbsoluteReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_1: return ELF::R_390_8; + case FK_Data_2: return ELF::R_390_16; + case FK_Data_4: return ELF::R_390_32; + case FK_Data_8: return ELF::R_390_64; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the relocation type for a PC-relative value of MCFixupKind Kind. +static unsigned getPCRelReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_2: return ELF::R_390_PC16; + case FK_Data_4: return ELF::R_390_PC32; + case FK_Data_8: return ELF::R_390_PC64; + case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL; + case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL; + } + llvm_unreachable("Unsupported PC-relative address"); +} + +// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind. +static unsigned getTLSLEReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LE32; + case FK_Data_8: return ELF::R_390_TLS_LE64; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind. +static unsigned getTLSLDOReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LDO32; + case FK_Data_8: return ELF::R_390_TLS_LDO64; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind. +static unsigned getTLSLDMReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LDM32; + case FK_Data_8: return ELF::R_390_TLS_LDM64; + case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind. +static unsigned getTLSGDReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_GD32; + case FK_Data_8: return ELF::R_390_TLS_GD64; + case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the PLT relocation counterpart of MCFixupKind Kind. +static unsigned getPLTReloc(unsigned Kind) { + switch (Kind) { + case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL; + case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL; + } + llvm_unreachable("Unsupported absolute address"); +} + +unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); + unsigned Kind = Fixup.getKind(); + switch (Modifier) { + case MCSymbolRefExpr::VK_None: + if (IsPCRel) + return getPCRelReloc(Kind); + return getAbsoluteReloc(Kind); + + case MCSymbolRefExpr::VK_NTPOFF: + assert(!IsPCRel && "NTPOFF shouldn't be PC-relative"); + return getTLSLEReloc(Kind); + + case MCSymbolRefExpr::VK_INDNTPOFF: + if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) + return ELF::R_390_TLS_IEENT; + llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now"); + + case MCSymbolRefExpr::VK_DTPOFF: + assert(!IsPCRel && "DTPOFF shouldn't be PC-relative"); + return getTLSLDOReloc(Kind); + + case MCSymbolRefExpr::VK_TLSLDM: + assert(!IsPCRel && "TLSLDM shouldn't be PC-relative"); + return getTLSLDMReloc(Kind); + + case MCSymbolRefExpr::VK_TLSGD: + assert(!IsPCRel && "TLSGD shouldn't be PC-relative"); + return getTLSGDReloc(Kind); + + case MCSymbolRefExpr::VK_GOT: + if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) + return ELF::R_390_GOTENT; + llvm_unreachable("Only PC-relative GOT accesses are supported for now"); + + case MCSymbolRefExpr::VK_PLT: + assert(IsPCRel && "@PLT shouldt be PC-relative"); + return getPLTReloc(Kind); + + default: + llvm_unreachable("Modifier not supported"); + } +} + +MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp new file mode 100644 index 0000000..2115d44 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -0,0 +1,250 @@ +//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCTargetDesc.h" +#include "InstPrinter/SystemZInstPrinter.h" +#include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SystemZGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SystemZGenRegisterInfo.inc" + +const unsigned SystemZMC::GR32Regs[16] = { + SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L, + SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L, + SystemZ::R8L, SystemZ::R9L, SystemZ::R10L, SystemZ::R11L, + SystemZ::R12L, SystemZ::R13L, SystemZ::R14L, SystemZ::R15L +}; + +const unsigned SystemZMC::GRH32Regs[16] = { + SystemZ::R0H, SystemZ::R1H, SystemZ::R2H, SystemZ::R3H, + SystemZ::R4H, SystemZ::R5H, SystemZ::R6H, SystemZ::R7H, + SystemZ::R8H, SystemZ::R9H, SystemZ::R10H, SystemZ::R11H, + SystemZ::R12H, SystemZ::R13H, SystemZ::R14H, SystemZ::R15H +}; + +const unsigned SystemZMC::GR64Regs[16] = { + SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, + SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, + SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, + SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D +}; + +const unsigned SystemZMC::GR128Regs[16] = { + SystemZ::R0Q, 0, SystemZ::R2Q, 0, + SystemZ::R4Q, 0, SystemZ::R6Q, 0, + SystemZ::R8Q, 0, SystemZ::R10Q, 0, + SystemZ::R12Q, 0, SystemZ::R14Q, 0 +}; + +const unsigned SystemZMC::FP32Regs[16] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S +}; + +const unsigned SystemZMC::FP64Regs[16] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D +}; + +const unsigned SystemZMC::FP128Regs[16] = { + SystemZ::F0Q, SystemZ::F1Q, 0, 0, + SystemZ::F4Q, SystemZ::F5Q, 0, 0, + SystemZ::F8Q, SystemZ::F9Q, 0, 0, + SystemZ::F12Q, SystemZ::F13Q, 0, 0 +}; + +const unsigned SystemZMC::VR32Regs[32] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, + SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S, + SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S, + SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S, + SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S +}; + +const unsigned SystemZMC::VR64Regs[32] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, + SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D, + SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D, + SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D, + SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D +}; + +const unsigned SystemZMC::VR128Regs[32] = { + SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3, + SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7, + SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11, + SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15, + SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19, + SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23, + SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, + SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31 +}; + +unsigned SystemZMC::getFirstReg(unsigned Reg) { + static unsigned Map[SystemZ::NUM_TARGET_REGS]; + static bool Initialized = false; + if (!Initialized) { + for (unsigned I = 0; I < 16; ++I) { + Map[GR32Regs[I]] = I; + Map[GRH32Regs[I]] = I; + Map[GR64Regs[I]] = I; + Map[GR128Regs[I]] = I; + Map[FP128Regs[I]] = I; + } + for (unsigned I = 0; I < 32; ++I) { + Map[VR32Regs[I]] = I; + Map[VR64Regs[I]] = I; + Map[VR128Regs[I]] = I; + } + } + assert(Reg < SystemZ::NUM_TARGET_REGS); + return Map[Reg]; +} + +static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT) { + MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(nullptr, + MRI.getDwarfRegNum(SystemZ::R15D, true), + SystemZMC::CFAOffsetFromInitialSP); + MAI->addInitialFrameState(Inst); + return MAI; +} + +static MCInstrInfo *createSystemZMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSystemZMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createSystemZMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSystemZMCRegisterInfo(X, SystemZ::R14D); + return X; +} + +static MCSubtargetInfo * +createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + return createSystemZMCSubtargetInfoImpl(TT, CPU, FS); +} + +static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + // Static code is suitable for use in a dynamic executable; there is no + // separate DynamicNoPIC model. + if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) + RM = Reloc::Static; + + // For SystemZ we define the models as follows: + // + // Small: BRASL can call any function and will use a stub if necessary. + // Locally-binding symbols will always be in range of LARL. + // + // Medium: BRASL can call any function and will use a stub if necessary. + // GOT slots and locally-defined text will always be in range + // of LARL, but other symbols might not be. + // + // Large: Equivalent to Medium for now. + // + // Kernel: Equivalent to Medium for now. + // + // This means that any PIC module smaller than 4GB meets the + // requirements of Small, so Small seems like the best default there. + // + // All symbols bind locally in a non-PIC module, so the choice is less + // obvious. There are two cases: + // + // - When creating an executable, PLTs and copy relocations allow + // us to treat external symbols as part of the executable. + // Any executable smaller than 4GB meets the requirements of Small, + // so that seems like the best default. + // + // - When creating JIT code, stubs will be in range of BRASL if the + // image is less than 4GB in size. GOT entries will likewise be + // in range of LARL. However, the JIT environment has no equivalent + // of copy relocs, so locally-binding data symbols might not be in + // the range of LARL. We need the Medium model in that case. + if (CM == CodeModel::Default) + CM = CodeModel::Small; + else if (CM == CodeModel::JITDefault) + CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; + X->initMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new SystemZInstPrinter(MAI, MII, MRI); +} + +extern "C" void LLVMInitializeSystemZTargetMC() { + // Register the MCAsmInfo. + TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget, + createSystemZMCAsmInfo); + + // Register the MCCodeGenInfo. + TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget, + createSystemZMCCodeGenInfo); + + // Register the MCCodeEmitter. + TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget, + createSystemZMCCodeEmitter); + + // Register the MCInstrInfo. + TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, + createSystemZMCInstrInfo); + + // Register the MCRegisterInfo. + TargetRegistry::RegisterMCRegInfo(TheSystemZTarget, + createSystemZMCRegisterInfo); + + // Register the MCSubtargetInfo. + TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, + createSystemZMCSubtargetInfo); + + // Register the MCAsmBackend. + TargetRegistry::RegisterMCAsmBackend(TheSystemZTarget, + createSystemZMCAsmBackend); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget, + createSystemZMCInstPrinter); +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h new file mode 100644 index 0000000..0db48fe --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -0,0 +1,105 @@ +//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class StringRef; +class Target; +class Triple; +class raw_pwrite_stream; +class raw_ostream; + +extern Target TheSystemZTarget; + +namespace SystemZMC { +// How many bytes are in the ABI-defined, caller-allocated part of +// a stack frame. +const int64_t CallFrameSize = 160; + +// The offset of the DWARF CFA from the incoming stack pointer. +const int64_t CFAOffsetFromInitialSP = CallFrameSize; + +// Maps of asm register numbers to LLVM register numbers, with 0 indicating +// an invalid register. In principle we could use 32-bit and 64-bit register +// classes directly, provided that we relegated the GPR allocation order +// in SystemZRegisterInfo.td to an AltOrder and left the default order +// as %r0-%r15. It seems better to provide the same interface for +// all classes though. +extern const unsigned GR32Regs[16]; +extern const unsigned GRH32Regs[16]; +extern const unsigned GR64Regs[16]; +extern const unsigned GR128Regs[16]; +extern const unsigned FP32Regs[16]; +extern const unsigned FP64Regs[16]; +extern const unsigned FP128Regs[16]; +extern const unsigned VR32Regs[32]; +extern const unsigned VR64Regs[32]; +extern const unsigned VR128Regs[32]; + +// Return the 0-based number of the first architectural register that +// contains the given LLVM register. E.g. R1D -> 1. +unsigned getFirstReg(unsigned Reg); + +// Return the given register as a GR64. +inline unsigned getRegAsGR64(unsigned Reg) { + return GR64Regs[getFirstReg(Reg)]; +} + +// Return the given register as a low GR32. +inline unsigned getRegAsGR32(unsigned Reg) { + return GR32Regs[getFirstReg(Reg)]; +} + +// Return the given register as a high GR32. +inline unsigned getRegAsGRH32(unsigned Reg) { + return GRH32Regs[getFirstReg(Reg)]; +} + +// Return the given register as a VR128. +inline unsigned getRegAsVR128(unsigned Reg) { + return VR128Regs[getFirstReg(Reg)]; +} +} // end namespace SystemZMC + +MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + +MCAsmBackend *createSystemZMCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, StringRef CPU); + +MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); +} // end namespace llvm + +// Defines symbolic names for SystemZ registers. +// This defines a mapping from register name to register number. +#define GET_REGINFO_ENUM +#include "SystemZGenRegisterInfo.inc" + +// Defines symbolic names for the SystemZ instructions. +#define GET_INSTRINFO_ENUM +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SystemZGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt new file mode 100644 index 0000000..cd367d6 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/README.txt @@ -0,0 +1,168 @@ +//===---------------------------------------------------------------------===// +// Random notes about and ideas for the SystemZ backend. +//===---------------------------------------------------------------------===// + +The initial backend is deliberately restricted to z10. We should add support +for later architectures at some point. + +-- + +SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all +inline asm memory constraints; it doesn't get to see the original constraint. +This means that it must conservatively treat all inline asm constraints +as the most restricted type, "R". + +-- + +If an inline asm ties an i32 "r" result to an i64 input, the input +will be treated as an i32, leaving the upper bits uninitialised. +For example: + +define void @f4(i32 *%dst) { + %val = call i32 asm "blah $0", "=r,0" (i64 103) + store i32 %val, i32 *%dst + ret void +} + +from CodeGen/SystemZ/asm-09.ll will use LHI rather than LGHI. +to load 103. This seems to be a general target-independent problem. + +-- + +The tuning of the choice between LOAD ADDRESS (LA) and addition in +SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on +performance measurements. + +-- + +There is no scheduling support. + +-- + +We don't use the BRANCH ON INDEX instructions. + +-- + +We might want to use BRANCH ON CONDITION for conditional indirect calls +and conditional returns. + +-- + +We don't use the TEST DATA CLASS instructions. + +-- + +We only use MVC, XC and CLC for constant-length block operations. +We could extend them to variable-length operations too, +using EXECUTE RELATIVE LONG. + +MVCIN, MVCLE and CLCLE may be worthwhile too. + +-- + +We don't use CUSE or the TRANSLATE family of instructions for string +operations. The TRANSLATE ones are probably more difficult to exploit. + +-- + +We don't take full advantage of builtins like fabsl because the calling +conventions require f128s to be returned by invisible reference. + +-- + +ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to +produce a carry. SUBTRACT LOGICAL IMMEDIATE could be useful when we +need to produce a borrow. (Note that there are no memory forms of +ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high +part of 128-bit memory operations would probably need to be done +via a register.) + +-- + +We don't use the halfword forms of LOAD REVERSED and STORE REVERSED +(LRVH and STRVH). + +-- + +We don't use ICM or STCM. + +-- + +DAGCombiner doesn't yet fold truncations of extended loads. Functions like: + + unsigned long f (unsigned long x, unsigned short *y) + { + return (x << 32) | *y; + } + +therefore end up as: + + sllg %r2, %r2, 32 + llgh %r0, 0(%r3) + lr %r2, %r0 + br %r14 + +but truncating the load would give: + + sllg %r2, %r2, 32 + lh %r2, 0(%r3) + br %r14 + +-- + +Functions like: + +define i64 @f1(i64 %a) { + %and = and i64 %a, 1 + ret i64 %and +} + +ought to be implemented as: + + lhi %r0, 1 + ngr %r2, %r0 + br %r14 + +but two-address optimisations reverse the order of the AND and force: + + lhi %r0, 1 + ngr %r0, %r2 + lgr %r2, %r0 + br %r14 + +CodeGen/SystemZ/and-04.ll has several examples of this. + +-- + +Out-of-range displacements are usually handled by loading the full +address into a register. In many cases it would be better to create +an anchor point instead. E.g. for: + +define void @f4a(i128 *%aptr, i64 %base) { + %addr = add i64 %base, 524288 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +(from CodeGen/SystemZ/int-add-08.ll) we load %base+524288 and %base+524296 +into separate registers, rather than using %base+524288 as a base for both. + +-- + +Dynamic stack allocations round the size to 8 bytes and then allocate +that rounded amount. It would be simpler to subtract the unrounded +size from the copy of the stack pointer and then align the result. +See CodeGen/SystemZ/alloca-01.ll for an example. + +-- + +If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. + +-- + +We might want to model all access registers and use them to spill +32-bit values. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h new file mode 100644 index 0000000..cafe2c5 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h @@ -0,0 +1,143 @@ +//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM SystemZ backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/Support/CodeGen.h" + +namespace llvm { +class SystemZTargetMachine; +class FunctionPass; + +namespace SystemZ { +// Condition-code mask values. +const unsigned CCMASK_0 = 1 << 3; +const unsigned CCMASK_1 = 1 << 2; +const unsigned CCMASK_2 = 1 << 1; +const unsigned CCMASK_3 = 1 << 0; +const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; + +// Condition-code mask assignments for integer and floating-point +// comparisons. +const unsigned CCMASK_CMP_EQ = CCMASK_0; +const unsigned CCMASK_CMP_LT = CCMASK_1; +const unsigned CCMASK_CMP_GT = CCMASK_2; +const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT; +const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT; +const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT; + +// Condition-code mask assignments for floating-point comparisons only. +const unsigned CCMASK_CMP_UO = CCMASK_3; +const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO; + +// All condition-code values produced by comparisons. +const unsigned CCMASK_ICMP = CCMASK_0 | CCMASK_1 | CCMASK_2; +const unsigned CCMASK_FCMP = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; + +// Condition-code mask assignments for CS. +const unsigned CCMASK_CS_EQ = CCMASK_0; +const unsigned CCMASK_CS_NE = CCMASK_1; +const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1; + +// Condition-code mask assignments for a completed SRST loop. +const unsigned CCMASK_SRST_FOUND = CCMASK_1; +const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; +const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + +// Condition-code mask assignments for TEST UNDER MASK. +const unsigned CCMASK_TM_ALL_0 = CCMASK_0; +const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1; +const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2; +const unsigned CCMASK_TM_ALL_1 = CCMASK_3; +const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY; +const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY; +const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1; +const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; +const unsigned CCMASK_TM = CCMASK_ANY; + +// Condition-code mask assignments for TRANSACTION_BEGIN. +const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; +const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; +const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2; +const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3; +const unsigned CCMASK_TBEGIN = CCMASK_ANY; + +// Condition-code mask assignments for TRANSACTION_END. +const unsigned CCMASK_TEND_TX = CCMASK_0; +const unsigned CCMASK_TEND_NOTX = CCMASK_2; +const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; + +// Condition-code mask assignments for vector comparisons (and similar +// operations). +const unsigned CCMASK_VCMP_ALL = CCMASK_0; +const unsigned CCMASK_VCMP_MIXED = CCMASK_1; +const unsigned CCMASK_VCMP_NONE = CCMASK_3; +const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; + +// The position of the low CC bit in an IPM result. +const unsigned IPM_CC = 28; + +// Mask assignments for PFD. +const unsigned PFD_READ = 1; +const unsigned PFD_WRITE = 2; + +// Number of bits in a vector register. +const unsigned VectorBits = 128; + +// Number of bytes in a vector register (and consequently the number of +// bytes in a general permute vector). +const unsigned VectorBytes = VectorBits / 8; + +// Return true if Val fits an LLILL operand. +static inline bool isImmLL(uint64_t Val) { + return (Val & ~0x000000000000ffffULL) == 0; +} + +// Return true if Val fits an LLILH operand. +static inline bool isImmLH(uint64_t Val) { + return (Val & ~0x00000000ffff0000ULL) == 0; +} + +// Return true if Val fits an LLIHL operand. +static inline bool isImmHL(uint64_t Val) { + return (Val & ~0x00000ffff00000000ULL) == 0; +} + +// Return true if Val fits an LLIHH operand. +static inline bool isImmHH(uint64_t Val) { + return (Val & ~0xffff000000000000ULL) == 0; +} + +// Return true if Val fits an LLILF operand. +static inline bool isImmLF(uint64_t Val) { + return (Val & ~0x00000000ffffffffULL) == 0; +} + +// Return true if Val fits an LLIHF operand. +static inline bool isImmHF(uint64_t Val) { + return (Val & ~0xffffffff00000000ULL) == 0; +} +} // end namespace SystemZ + +FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOpt::Level OptLevel); +FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); +FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td new file mode 100644 index 0000000..d4d636d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td @@ -0,0 +1,63 @@ +//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// SystemZ supported processors and features +//===----------------------------------------------------------------------===// + +include "SystemZProcessors.td" + +//===----------------------------------------------------------------------===// +// Register file description +//===----------------------------------------------------------------------===// + +include "SystemZRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Calling convention description +//===----------------------------------------------------------------------===// + +include "SystemZCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction descriptions +//===----------------------------------------------------------------------===// + +include "SystemZOperators.td" +include "SystemZOperands.td" +include "SystemZPatterns.td" +include "SystemZInstrFormats.td" +include "SystemZInstrInfo.td" +include "SystemZInstrVector.td" +include "SystemZInstrFP.td" + +def SystemZInstrInfo : InstrInfo {} + +//===----------------------------------------------------------------------===// +// Assembly parser +//===----------------------------------------------------------------------===// + +def SystemZAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +//===----------------------------------------------------------------------===// +// Top-level target declaration +//===----------------------------------------------------------------------===// + +def SystemZ : Target { + let InstructionSet = SystemZInstrInfo; + let AssemblyParsers = [SystemZAsmParser]; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp new file mode 100644 index 0000000..7527311 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -0,0 +1,327 @@ +//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Streams SystemZ assembly language and associated data, in the form of +// MCInsts and MCExprs respectively. +// +//===----------------------------------------------------------------------===// + +#include "SystemZAsmPrinter.h" +#include "InstPrinter/SystemZInstPrinter.h" +#include "SystemZConstantPoolValue.h" +#include "SystemZMCInstLower.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +// Return an RI instruction like MI with opcode Opcode, but with the +// GR64 register operands turned into GR32s. +static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) { + if (MI->isCompare()) + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(1).getImm()); + else + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg())) + .addImm(MI->getOperand(2).getImm()); +} + +// Return an RI instruction like MI with opcode Opcode, but with the +// GR64 register operands turned into GRH32s. +static MCInst lowerRIHigh(const MachineInstr *MI, unsigned Opcode) { + if (MI->isCompare()) + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(1).getImm()); + else + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(1).getReg())) + .addImm(MI->getOperand(2).getImm()); +} + +// Return an RI instruction like MI with opcode Opcode, but with the +// R2 register turned into a GR64. +static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())) + .addImm(MI->getOperand(3).getImm()) + .addImm(MI->getOperand(4).getImm()) + .addImm(MI->getOperand(5).getImm()); +} + +static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { + StringRef Name = "__tls_get_offset"; + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), + MCSymbolRefExpr::VK_PLT, + Context); +} + +static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { + StringRef Name = "_GLOBAL_OFFSET_TABLE_"; + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), + MCSymbolRefExpr::VK_None, + Context); +} + +// MI loads the high part of a vector from memory. Return an instruction +// that uses replicating vector load Opcode to do the same thing. +static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()); +} + +// MI stores the high part of a vector to memory. Return an instruction +// that uses elemental vector store Opcode to do the same thing. +static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); +} + +void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SystemZMCInstLower Lower(MF->getContext(), *this); + MCInst LoweredMI; + switch (MI->getOpcode()) { + case SystemZ::Return: + LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D); + break; + + case SystemZ::CallBRASL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBASR: + LoweredMI = MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R14D) + .addReg(MI->getOperand(0).getReg()); + break; + + case SystemZ::CallJG: + LoweredMI = MCInstBuilder(SystemZ::JG) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBR: + LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); + break; + + case SystemZ::TLS_GDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD)); + break; + + case SystemZ::TLS_LDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM)); + break; + + case SystemZ::GOT: + LoweredMI = MCInstBuilder(SystemZ::LARL) + .addReg(MI->getOperand(0).getReg()) + .addExpr(getGlobalOffsetTable(MF->getContext())); + break; + + case SystemZ::IILF64: + LoweredMI = MCInstBuilder(SystemZ::IILF) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(2).getImm()); + break; + + case SystemZ::IIHF64: + LoweredMI = MCInstBuilder(SystemZ::IIHF) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(2).getImm()); + break; + + case SystemZ::RISBHH: + case SystemZ::RISBHL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG); + break; + + case SystemZ::RISBLH: + case SystemZ::RISBLL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG); + break; + + case SystemZ::VLVGP32: + LoweredMI = MCInstBuilder(SystemZ::VLVGP) + .addReg(MI->getOperand(0).getReg()) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg())) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())); + break; + + case SystemZ::VLR32: + case SystemZ::VLR64: + LoweredMI = MCInstBuilder(SystemZ::VLR) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())); + break; + + case SystemZ::VL32: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); + break; + + case SystemZ::VL64: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG); + break; + + case SystemZ::VST32: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF); + break; + + case SystemZ::VST64: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG); + break; + + case SystemZ::LFER: + LoweredMI = MCInstBuilder(SystemZ::VLGVF) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())) + .addReg(0).addImm(0); + break; + + case SystemZ::LEFR: + LoweredMI = MCInstBuilder(SystemZ::VLVGF) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addReg(0).addImm(0); + break; + +#define LOWER_LOW(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break + + LOWER_LOW(IILL); + LOWER_LOW(IILH); + LOWER_LOW(TMLL); + LOWER_LOW(TMLH); + LOWER_LOW(NILL); + LOWER_LOW(NILH); + LOWER_LOW(NILF); + LOWER_LOW(OILL); + LOWER_LOW(OILH); + LOWER_LOW(OILF); + LOWER_LOW(XILF); + +#undef LOWER_LOW + +#define LOWER_HIGH(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRIHigh(MI, SystemZ::NAME); break + + LOWER_HIGH(IIHL); + LOWER_HIGH(IIHH); + LOWER_HIGH(TMHL); + LOWER_HIGH(TMHH); + LOWER_HIGH(NIHL); + LOWER_HIGH(NIHH); + LOWER_HIGH(NIHF); + LOWER_HIGH(OIHL); + LOWER_HIGH(OIHH); + LOWER_HIGH(OIHF); + LOWER_HIGH(XIHF); + +#undef LOWER_HIGH + + case SystemZ::Serialize: + if (MF->getSubtarget<SystemZSubtarget>().hasFastSerialization()) + LoweredMI = MCInstBuilder(SystemZ::AsmBCR) + .addImm(14).addReg(SystemZ::R0D); + else + LoweredMI = MCInstBuilder(SystemZ::AsmBCR) + .addImm(15).addReg(SystemZ::R0D); + break; + + default: + Lower.lower(MI, LoweredMI); + break; + } + EmitToStreamer(*OutStreamer, LoweredMI); +} + +// Convert a SystemZ-specific constant pool modifier into the associated +// MCSymbolRefExpr variant kind. +static MCSymbolRefExpr::VariantKind +getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { + switch (Modifier) { + case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; + case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM; + case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF; + case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF; + } + llvm_unreachable("Invalid SystemCPModifier!"); +} + +void SystemZAsmPrinter:: +EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV); + + const MCExpr *Expr = + MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()), + getModifierVariantKind(ZCPV->getModifier()), + OutContext); + uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType()); + + OutStreamer->EmitValue(Expr, Size); +} + +bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + if (ExtraCode && *ExtraCode == 'n') { + if (!MI->getOperand(OpNo).isImm()) + return true; + OS << -int64_t(MI->getOperand(OpNo).getImm()); + } else { + SystemZMCInstLower Lower(MF->getContext(), *this); + MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo))); + SystemZInstPrinter::printOperand(MO, MAI, OS); + } + return false; +} + +bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(), + MI->getOperand(OpNo + 1).getImm(), + MI->getOperand(OpNo + 2).getReg(), OS); + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeSystemZAsmPrinter() { + RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h new file mode 100644 index 0000000..7f6e823 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -0,0 +1,44 @@ +//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class MCStreamer; +class MachineBasicBlock; +class MachineInstr; +class Module; +class raw_ostream; + +class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter { +public: + SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + + // Override AsmPrinter. + const char *getPassName() const override { + return "SystemZ Assembly Printer"; + } + void EmitInstruction(const MachineInstr *MI) override; + void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp new file mode 100644 index 0000000..cc9c84b --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -0,0 +1,21 @@ +//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZCallingConv.h" +#include "SystemZRegisterInfo.h" + +using namespace llvm; + +const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = { + SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D +}; + +const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = { + SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D +}; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h new file mode 100644 index 0000000..bff0706 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -0,0 +1,84 @@ +//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" + +namespace llvm { +namespace SystemZ { + const unsigned NumArgGPRs = 5; + extern const unsigned ArgGPRs[NumArgGPRs]; + + const unsigned NumArgFPRs = 4; + extern const unsigned ArgFPRs[NumArgFPRs]; +} // end namespace SystemZ + +class SystemZCCState : public CCState { +private: + /// Records whether the value was a fixed argument. + /// See ISD::OutputArg::IsFixed. + SmallVector<bool, 4> ArgIsFixed; + + /// Records whether the value was widened from a short vector type. + SmallVector<bool, 4> ArgIsShortVector; + + // Check whether ArgVT is a short vector type. + bool IsShortVectorType(EVT ArgVT) { + return ArgVT.isVector() && ArgVT.getStoreSize() <= 8; + } + +public: + SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) + : CCState(CC, isVarArg, MF, locs, C) {} + + void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + // Formal arguments are always fixed. + ArgIsFixed.clear(); + for (unsigned i = 0; i < Ins.size(); ++i) + ArgIsFixed.push_back(true); + // Record whether the call operand was a short vector. + ArgIsShortVector.clear(); + for (unsigned i = 0; i < Ins.size(); ++i) + ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT)); + + CCState::AnalyzeFormalArguments(Ins, Fn); + } + + void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Record whether the call operand was a fixed argument. + ArgIsFixed.clear(); + for (unsigned i = 0; i < Outs.size(); ++i) + ArgIsFixed.push_back(Outs[i].IsFixed); + // Record whether the call operand was a short vector. + ArgIsShortVector.clear(); + for (unsigned i = 0; i < Outs.size(); ++i) + ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT)); + + CCState::AnalyzeCallOperands(Outs, Fn); + } + + // This version of AnalyzeCallOperands in the base class is not usable + // since we must provide a means of accessing ISD::OutputArg::IsFixed. + void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) = delete; + + bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; } + bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td new file mode 100644 index 0000000..bdd1b15 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -0,0 +1,107 @@ +//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for the SystemZ ABI. +//===----------------------------------------------------------------------===// + +class CCIfExtend<CCAction A> + : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; + +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("static_cast<const SystemZSubtarget&>" + "(State.getMachineFunction().getSubtarget()).", F), + A>; + +// Match if this specific argument is a fixed (i.e. named) argument. +class CCIfFixed<CCAction A> + : CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>; + +// Match if this specific argument was widened from a short vector type. +class CCIfShortVector<CCAction A> + : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>; + + +//===----------------------------------------------------------------------===// +// z/Linux return value calling convention +//===----------------------------------------------------------------------===// +def RetCC_SystemZ : CallingConv<[ + // Promote i32 to i64 if it has an explicit extension type. + CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + + // ABI-compliant code returns 64-bit integers in R2. Make the other + // call-clobbered argument registers available for code that doesn't + // care about the ABI. (R6 is an argument register too, but is + // call-saved and therefore not suitable for return values.) + CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L]>>, + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>, + + // ABI-complaint code returns float and double in F0. Make the + // other floating-point argument registers available for code that + // doesn't care about the ABI. All floating-point argument registers + // are call-clobbered, so we can use all of them here. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + + // Similarly for vectors, with V24 being the ABI-compliant choice. + // Sub-128 vectors are returned in the same way, but they're widened + // to one of these types during type legalization. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> +]>; + +//===----------------------------------------------------------------------===// +// z/Linux argument calling conventions +//===----------------------------------------------------------------------===// +def CC_SystemZ : CallingConv<[ + // Promote i32 to i64 if it has an explicit extension type. + // The convention is that true integer arguments that are smaller + // than 64 bits should be marked as extended, but structures that + // are smaller than 64 bits shouldn't. + CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + + // Force long double values to the stack and pass i64 pointers to them. + CCIfType<[f128], CCPassIndirect<i64>>, + + // The first 5 integer arguments are passed in R2-R6. Note that R6 + // is call-saved. + CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L, R6L]>>, + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>, + + // The first 4 float and double arguments are passed in even registers F0-F6. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + + // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors + // are passed in the same way, but they're widened to one of these types + // during type legalization. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed<CCAssignToReg<[V24, V26, V28, V30, + V25, V27, V29, V31]>>>>, + + // However, sub-128 vectors which need to go on the stack occupy just a + // single 8-byte-aligned 8-byte stack slot. Pass as i64. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfShortVector<CCBitConvertToType<i64>>>>, + + // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 8>>>, + + // Other arguments are passed in 8-byte-aligned 8-byte stack slots. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> +]>; + +//===----------------------------------------------------------------------===// +// z/Linux callee-saved registers +//===----------------------------------------------------------------------===// +def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15), + (sequence "F%dD", 8, 15))>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp new file mode 100644 index 0000000..4a6beb6 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -0,0 +1,52 @@ +//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZConstantPoolValue.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +SystemZConstantPoolValue:: +SystemZConstantPoolValue(const GlobalValue *gv, + SystemZCP::SystemZCPModifier modifier) + : MachineConstantPoolValue(gv->getType()), GV(gv), Modifier(modifier) {} + +SystemZConstantPoolValue * +SystemZConstantPoolValue::Create(const GlobalValue *GV, + SystemZCP::SystemZCPModifier Modifier) { + return new SystemZConstantPoolValue(GV, Modifier); +} + +int SystemZConstantPoolValue:: +getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants(); + for (unsigned I = 0, E = Constants.size(); I != E; ++I) { + if (Constants[I].isMachineConstantPoolEntry() && + (Constants[I].getAlignment() & AlignMask) == 0) { + auto *ZCPV = + static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal); + if (ZCPV->GV == GV && ZCPV->Modifier == Modifier) + return I; + } + } + return -1; +} + +void SystemZConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(GV); + ID.AddInteger(Modifier); +} + +void SystemZConstantPoolValue::print(raw_ostream &O) const { + O << GV << "@" << int(Modifier); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h new file mode 100644 index 0000000..a71b595 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -0,0 +1,58 @@ +//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H + +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +class GlobalValue; + +namespace SystemZCP { +enum SystemZCPModifier { + TLSGD, + TLSLDM, + DTPOFF, + NTPOFF +}; +} // end namespace SystemZCP + +/// A SystemZ-specific constant pool value. At present, the only +/// defined constant pool values are module IDs or offsets of +/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF, +/// or x@NTPOFF). +class SystemZConstantPoolValue : public MachineConstantPoolValue { + const GlobalValue *GV; + SystemZCP::SystemZCPModifier Modifier; + +protected: + SystemZConstantPoolValue(const GlobalValue *GV, + SystemZCP::SystemZCPModifier Modifier); + +public: + static SystemZConstantPoolValue * + Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier); + + // Override MachineConstantPoolValue. + int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) override; + void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; + void print(raw_ostream &O) const override; + + // Access SystemZ-specific fields. + const GlobalValue *getGlobalValue() const { return GV; } + SystemZCP::SystemZCPModifier getModifier() const { return Modifier; } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp new file mode 100644 index 0000000..4818ed0 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -0,0 +1,479 @@ +//===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass: +// (1) tries to remove compares if CC already contains the required information +// (2) fuses compares and branches into COMPARE AND BRANCH instructions +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-elim-compare" + +STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); +STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); +STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); + +namespace { +// Represents the references to a particular register in one or more +// instructions. +struct Reference { + Reference() + : Def(false), Use(false) {} + + Reference &operator|=(const Reference &Other) { + Def |= Other.Def; + Use |= Other.Use; + return *this; + } + + explicit operator bool() const { return Def || Use; } + + // True if the register is defined or used in some form, either directly or + // via a sub- or super-register. + bool Def; + bool Use; +}; + +class SystemZElimCompare : public MachineFunctionPass { +public: + static char ID; + SystemZElimCompare(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {} + + const char *getPassName() const override { + return "SystemZ Comparison Elimination"; + } + + bool processBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) override; + +private: + Reference getRegReferences(MachineInstr *MI, unsigned Reg); + bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool convertToLoadAndTest(MachineInstr *MI); + bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool optimizeCompareZero(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool fuseCompareAndBranch(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; +}; + +char SystemZElimCompare::ID = 0; +} // end anonymous namespace + +FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { + return new SystemZElimCompare(TM); +} + +// Return true if CC is live out of MBB. +static bool isCCLiveOut(MachineBasicBlock &MBB) { + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + +// Return true if any CC result of MI would reflect the value of Reg. +static bool resultTests(MachineInstr *MI, unsigned Reg) { + if (MI->getNumOperands() > 0 && + MI->getOperand(0).isReg() && + MI->getOperand(0).isDef() && + MI->getOperand(0).getReg() == Reg) + return true; + + switch (MI->getOpcode()) { + case SystemZ::LR: + case SystemZ::LGR: + case SystemZ::LGFR: + case SystemZ::LTR: + case SystemZ::LTGR: + case SystemZ::LTGFR: + case SystemZ::LER: + case SystemZ::LDR: + case SystemZ::LXR: + case SystemZ::LTEBR: + case SystemZ::LTDBR: + case SystemZ::LTXBR: + if (MI->getOperand(1).getReg() == Reg) + return true; + } + + return false; +} + +// Describe the references to Reg or any of its aliases in MI. +Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { + Reference Ref; + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (MO.isReg()) { + if (unsigned MOReg = MO.getReg()) { + if (TRI->regsOverlap(MOReg, Reg)) { + if (MO.isUse()) + Ref.Use = true; + else if (MO.isDef()) + Ref.Def = true; + } + } + } + } + return Ref; +} + +// Return true if this is a load and test which can be optimized the +// same way as compare instruction. +static bool isLoadAndTestAsCmp(MachineInstr *MI) { + // If we during isel used a load-and-test as a compare with 0, the + // def operand is dead. + return ((MI->getOpcode() == SystemZ::LTEBR || + MI->getOpcode() == SystemZ::LTDBR || + MI->getOpcode() == SystemZ::LTXBR) && + MI->getOperand(0).isDead()); +} + +// Return the source register of Compare, which is the unknown value +// being tested. +static unsigned getCompareSourceReg(MachineInstr *Compare) { + unsigned reg = 0; + if (Compare->isCompare()) + reg = Compare->getOperand(0).getReg(); + else if (isLoadAndTestAsCmp(Compare)) + reg = Compare->getOperand(1).getReg(); + assert (reg); + + return reg; +} + +// Compare compares the result of MI against zero. If MI is an addition +// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition +// and convert the branch to a BRCT(G). Return true on success. +bool +SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + // Check whether we have an addition of -1. + unsigned Opcode = MI->getOpcode(); + unsigned BRCT; + if (Opcode == SystemZ::AHI) + BRCT = SystemZ::BRCT; + else if (Opcode == SystemZ::AGHI) + BRCT = SystemZ::BRCTG; + else + return false; + if (MI->getOperand(2).getImm() != -1) + return false; + + // Check whether we have a single JLH. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::BRC || + Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || + Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE) + return false; + + // We already know that there are no references to the register between + // MI and Compare. Make sure that there are also no references between + // Compare and Branch. + unsigned SrcReg = getCompareSourceReg(Compare); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (getRegReferences(MBBI, SrcReg)) + return false; + + // The transformation is OK. Rebuild Branch as a BRCT(G). + MachineOperand Target(Branch->getOperand(2)); + while (Branch->getNumOperands()) + Branch->RemoveOperand(0); + Branch->setDesc(TII->get(BRCT)); + MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + MI->eraseFromParent(); + return true; +} + +// If MI is a load instruction, try to convert it into a LOAD AND TEST. +// Return true on success. +bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) { + unsigned Opcode = TII->getLoadAndTest(MI->getOpcode()); + if (!Opcode) + return false; + + MI->setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + return true; +} + +// The CC users in CCUsers are testing the result of a comparison of some +// value X against zero and we know that any CC value produced by MI +// would also reflect the value of X. Try to adjust CCUsers so that +// they test the result of MI directly, returning true on success. +// Leave everything unchanged on failure. +bool SystemZElimCompare:: +adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + int Opcode = MI->getOpcode(); + const MCInstrDesc &Desc = TII->get(Opcode); + unsigned MIFlags = Desc.TSFlags; + + // See which compare-style condition codes are available. + unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); + + // For unsigned comparisons with zero, only equality makes sense. + unsigned CompareFlags = Compare->getDesc().TSFlags; + if (CompareFlags & SystemZII::IsLogical) + ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; + + if (ReusableCCMask == 0) + return false; + + unsigned CCValues = SystemZII::getCCValues(MIFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + + // Now check whether these flags are enough for all users. + SmallVector<MachineOperand *, 4> AlterMasks; + for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { + MachineInstr *MI = CCUsers[I]; + + // Fail if this isn't a use of CC that we understand. + unsigned Flags = MI->getDesc().TSFlags; + unsigned FirstOpNum; + if (Flags & SystemZII::CCMaskFirst) + FirstOpNum = 0; + else if (Flags & SystemZII::CCMaskLast) + FirstOpNum = MI->getNumExplicitOperands() - 2; + else + return false; + + // Check whether the instruction predicate treats all CC values + // outside of ReusableCCMask in the same way. In that case it + // doesn't matter what those CC values mean. + unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); + unsigned OutValid = ~ReusableCCMask & CCValid; + unsigned OutMask = ~ReusableCCMask & CCMask; + if (OutMask != 0 && OutMask != OutValid) + return false; + + AlterMasks.push_back(&MI->getOperand(FirstOpNum)); + AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); + } + + // All users are OK. Adjust the masks for MI. + for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { + AlterMasks[I]->setImm(CCValues); + unsigned CCMask = AlterMasks[I + 1]->getImm(); + if (CCMask & ~ReusableCCMask) + AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | + (CCValues & ~ReusableCCMask)); + } + + // CC is now live after MI. + int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); + assert(CCDef >= 0 && "Couldn't find CC set"); + MI->getOperand(CCDef).setIsDead(false); + + // Clear any intervening kills of CC. + MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) + MBBI->clearRegisterKills(SystemZ::CC, TRI); + + return true; +} + +// Return true if Compare is a comparison against zero. +static bool isCompareZero(MachineInstr *Compare) { + switch (Compare->getOpcode()) { + case SystemZ::LTEBRCompare: + case SystemZ::LTDBRCompare: + case SystemZ::LTXBRCompare: + return true; + + default: + + if (isLoadAndTestAsCmp(Compare)) + return true; + + return (Compare->getNumExplicitOperands() == 2 && + Compare->getOperand(1).isImm() && + Compare->getOperand(1).getImm() == 0); + } +} + +// Try to optimize cases where comparison instruction Compare is testing +// a value against zero. Return true on success and if Compare should be +// deleted as dead. CCUsers is the list of instructions that use the CC +// value produced by Compare. +bool SystemZElimCompare:: +optimizeCompareZero(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + if (!isCompareZero(Compare)) + return false; + + // Search back for CC results that are based on the first operand. + unsigned SrcReg = getCompareSourceReg(Compare); + MachineBasicBlock &MBB = *Compare->getParent(); + MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin(); + Reference CCRefs; + Reference SrcRefs; + while (MBBI != MBBE) { + --MBBI; + MachineInstr *MI = MBBI; + if (resultTests(MI, SrcReg)) { + // Try to remove both MI and Compare by converting a branch to BRCT(G). + // We don't care in this case whether CC is modified between MI and + // Compare. + if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) { + BranchOnCounts += 1; + return true; + } + // Try to eliminate Compare by reusing a CC result from MI. + if ((!CCRefs && convertToLoadAndTest(MI)) || + (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { + EliminatedComparisons += 1; + return true; + } + } + SrcRefs |= getRegReferences(MI, SrcReg); + if (SrcRefs.Def) + return false; + CCRefs |= getRegReferences(MI, SystemZ::CC); + if (CCRefs.Use && CCRefs.Def) + return false; + } + return false; +} + +// Try to fuse comparison instruction Compare into a later branch. +// Return true on success and if Compare is therefore redundant. +bool SystemZElimCompare:: +fuseCompareAndBranch(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + // See whether we have a comparison that can be fused. + unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), + Compare); + if (!FusedOpcode) + return false; + + // See whether we have a single branch with which to fuse. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::BRC) + return false; + + // Make sure that the operands are available at the branch. + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg2 = (Compare->getOperand(1).isReg() ? + Compare->getOperand(1).getReg() : 0); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI->modifiesRegister(SrcReg, TRI) || + (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) + return false; + + // Read the branch mask and target. + MachineOperand CCMask(MBBI->getOperand(1)); + MachineOperand Target(MBBI->getOperand(2)); + assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && + "Invalid condition-code mask for integer comparison"); + + // Clear out all current operands. + int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); + assert(CCUse >= 0 && "BRC must use CC"); + Branch->RemoveOperand(CCUse); + Branch->RemoveOperand(2); + Branch->RemoveOperand(1); + Branch->RemoveOperand(0); + + // Rebuild Branch as a fused compare and branch. + Branch->setDesc(TII->get(FusedOpcode)); + MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) + .addOperand(Compare->getOperand(0)) + .addOperand(Compare->getOperand(1)) + .addOperand(CCMask) + .addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + + // Clear any intervening kills of SrcReg and SrcReg2. + MBBI = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MBBI->clearRegisterKills(SrcReg, TRI); + if (SrcReg2) + MBBI->clearRegisterKills(SrcReg2, TRI); + } + FusedComparisons += 1; + return true; +} + +// Process all comparison instructions in MBB. Return true if something +// changed. +bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + // Walk backwards through the block looking for comparisons, recording + // all CC users as we go. The subroutines can delete Compare and + // instructions before it. + bool CompleteCCUsers = !isCCLiveOut(MBB); + SmallVector<MachineInstr *, 4> CCUsers; + MachineBasicBlock::iterator MBBI = MBB.end(); + while (MBBI != MBB.begin()) { + MachineInstr *MI = --MBBI; + if (CompleteCCUsers && + (MI->isCompare() || isLoadAndTestAsCmp(MI)) && + (optimizeCompareZero(MI, CCUsers) || + fuseCompareAndBranch(MI, CCUsers))) { + ++MBBI; + MI->eraseFromParent(); + Changed = true; + CCUsers.clear(); + continue; + } + + if (MI->definesRegister(SystemZ::CC)) { + CCUsers.clear(); + CompleteCCUsers = true; + } + if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers) + CCUsers.push_back(MI); + } + return Changed; +} + +bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); + TRI = &TII->getRegisterInfo(); + + bool Changed = false; + for (auto &MBB : F) + Changed |= processBlock(MBB); + + return Changed; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp new file mode 100644 index 0000000..e1b20d0 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -0,0 +1,529 @@ +//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZFrameLowering.h" +#include "SystemZCallingConv.h" +#include "SystemZInstrBuilder.h" +#include "SystemZInstrInfo.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" + +using namespace llvm; + +namespace { +// The ABI-defined register save slots, relative to the incoming stack +// pointer. +static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 }, + { SystemZ::F0D, 0x80 }, + { SystemZ::F2D, 0x88 }, + { SystemZ::F4D, 0x90 }, + { SystemZ::F6D, 0x98 } +}; +} // end anonymous namespace + +SystemZFrameLowering::SystemZFrameLowering() + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, + -SystemZMC::CallFrameSize, 8, + false /* StackRealignable */) { + // Create a mapping from register number to save slot offset. + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) + RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; +} + +const TargetFrameLowering::SpillSlot * +SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = array_lengthof(SpillOffsetTable); + return SpillOffsetTable; +} + +void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + bool HasFP = hasFP(MF); + SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool IsVarArg = MF.getFunction()->isVarArg(); + + // va_start stores incoming FPR varargs in the normal way, but delegates + // the saving of incoming GPR varargs to spillCalleeSavedRegisters(). + // Record these pending uses, which typically include the call-saved + // argument register R6D. + if (IsVarArg) + for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) + SavedRegs.set(SystemZ::ArgGPRs[I]); + + // If the function requires a frame pointer, record that the hard + // frame pointer will be clobbered. + if (HasFP) + SavedRegs.set(SystemZ::R11D); + + // If the function calls other functions, record that the return + // address register will be clobbered. + if (MFFrame->hasCalls()) + SavedRegs.set(SystemZ::R14D); + + // If we are saving GPRs other than the stack pointer, we might as well + // save and restore the stack pointer at the same time, via STMG and LMG. + // This allows the deallocation to be done by the LMG, rather than needing + // a separate %r15 addition. + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned I = 0; CSRegs[I]; ++I) { + unsigned Reg = CSRegs[I]; + if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Reg)) { + SavedRegs.set(SystemZ::R15D); + break; + } + } +} + +// Add GPR64 to the save instruction being built by MIB, which is in basic +// block MBB. IsImplicit says whether this is an explicit operand to the +// instruction, or an implicit one that comes between the explicit start +// and end registers. +static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, + unsigned GPR64, bool IsImplicit) { + const TargetRegisterInfo *RI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); + bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32); + if (!IsLive || !IsImplicit) { + MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive)); + if (!IsLive) + MBB.addLiveIn(GPR64); + } +} + +bool SystemZFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool IsVarArg = MF.getFunction()->isVarArg(); + DebugLoc DL; + + // Scan the call-saved GPRs and find the bounds of the register spill area. + unsigned LowGPR = 0; + unsigned HighGPR = SystemZ::R15D; + unsigned StartOffset = -1U; + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + unsigned Offset = RegSpillOffsets[Reg]; + assert(Offset && "Unexpected GPR save"); + if (StartOffset > Offset) { + LowGPR = Reg; + StartOffset = Offset; + } + } + } + + // Save the range of call-saved registers, for use by the epilogue inserter. + ZFI->setLowSavedGPR(LowGPR); + ZFI->setHighSavedGPR(HighGPR); + + // Include the GPR varargs, if any. R6D is call-saved, so would + // be included by the loop above, but we also need to handle the + // call-clobbered argument registers. + if (IsVarArg) { + unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); + if (FirstGPR < SystemZ::NumArgGPRs) { + unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; + unsigned Offset = RegSpillOffsets[Reg]; + if (StartOffset > Offset) { + LowGPR = Reg; StartOffset = Offset; + } + } + } + + // Save GPRs + if (LowGPR) { + assert(LowGPR != HighGPR && "Should be saving %r15 and something else"); + + // Build an STMG instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); + + // Add the explicit register operands. + addSavedGPR(MBB, MIB, LowGPR, false); + addSavedGPR(MBB, MIB, HighGPR, false); + + // Add the address. + MIB.addReg(SystemZ::R15D).addImm(StartOffset); + + // Make sure all call-saved GPRs are included as operands and are + // marked as live on entry. + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) + addSavedGPR(MBB, MIB, Reg, true); + } + + // ...likewise GPR varargs. + if (IsVarArg) + for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) + addSavedGPR(MBB, MIB, SystemZ::ArgGPRs[I], true); + } + + // Save FPRs in the normal TargetInstrInfo way. + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI); + } + } + + return true; +} + +bool SystemZFrameLowering:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool HasFP = hasFP(MF); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Restore FPRs in the normal TargetInstrInfo way. + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI); + } + + // Restore call-saved GPRs (but not call-clobbered varargs, which at + // this point might hold return values). + unsigned LowGPR = ZFI->getLowSavedGPR(); + unsigned HighGPR = ZFI->getHighSavedGPR(); + unsigned StartOffset = RegSpillOffsets[LowGPR]; + if (LowGPR) { + // If we saved any of %r2-%r5 as varargs, we should also be saving + // and restoring %r6. If we're saving %r6 or above, we should be + // restoring it too. + assert(LowGPR != HighGPR && "Should be loading %r15 and something else"); + + // Build an LMG instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG)); + + // Add the explicit register operands. + MIB.addReg(LowGPR, RegState::Define); + MIB.addReg(HighGPR, RegState::Define); + + // Add the address. + MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D); + MIB.addImm(StartOffset); + + // Do a second scan adding regs as being defined by instruction + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (Reg != LowGPR && Reg != HighGPR) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + } + + return true; +} + +void SystemZFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + uint64_t MaxReach = (MFFrame->estimateStackSize(MF) + + SystemZMC::CallFrameSize * 2); + if (!isUInt<12>(MaxReach)) { + // We may need register scavenging slots if some parts of the frame + // are outside the reach of an unsigned 12-bit displacement. + // Create 2 for the case where both addresses in an MVC are + // out of range. + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + } +} + +// Emit instructions before MBBI (in MBB) to add NumBytes to Reg. +static void emitIncrement(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const DebugLoc &DL, + unsigned Reg, int64_t NumBytes, + const TargetInstrInfo *TII) { + while (NumBytes) { + unsigned Opcode; + int64_t ThisVal = NumBytes; + if (isInt<16>(NumBytes)) + Opcode = SystemZ::AGHI; + else { + Opcode = SystemZ::AGFI; + // Make sure we maintain 8-byte stack alignment. + int64_t MinVal = -uint64_t(1) << 31; + int64_t MaxVal = (int64_t(1) << 31) - 8; + if (ThisVal < MinVal) + ThisVal = MinVal; + else if (ThisVal > MaxVal) + ThisVal = MaxVal; + } + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg) + .addReg(Reg).addImm(ThisVal); + // The CC implicit def is dead. + MI->getOperand(3).setIsDead(); + NumBytes -= ThisVal; + } +} + +void SystemZFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + auto *ZII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo(); + bool HasFP = hasFP(MF); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + + // The current offset of the stack pointer from the CFA. + int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; + + if (ZFI->getLowSavedGPR()) { + // Skip over the GPR saves. + if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG) + ++MBBI; + else + llvm_unreachable("Couldn't skip over GPR saves"); + + // Add CFI for the GPR saves. + for (auto &Save : CSI) { + unsigned Reg = Save.getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + } + + uint64_t StackSize = getAllocatedStackSize(MF); + if (StackSize) { + // Allocate StackSize bytes. + int64_t Delta = -int64_t(StackSize); + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); + + // Add CFI for the allocation. + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + SPOffsetFromCFA += Delta; + } + + if (HasFP) { + // Copy the base of the frame to R11. + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D) + .addReg(SystemZ::R15D); + + // Add CFI for the new frame location. + unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, HardFP)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Mark the FramePtr as live at the beginning of every block except + // the entry block. (We'll have marked R11 as live on entry when + // saving the GPRs.) + for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I) + I->addLiveIn(SystemZ::R11D); + } + + // Skip over the FPR saves. + SmallVector<unsigned, 8> CFIIndexes; + for (auto &Save : CSI) { + unsigned Reg = Save.getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + if (MBBI != MBB.end() && + (MBBI->getOpcode() == SystemZ::STD || + MBBI->getOpcode() == SystemZ::STDY)) + ++MBBI; + else + llvm_unreachable("Couldn't skip over FPR save"); + + // Add CFI for the this save. + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + unsigned IgnoredFrameReg; + int64_t Offset = + getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg); + + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, DwarfReg, SPOffsetFromCFA + Offset)); + CFIIndexes.push_back(CFIIndex); + } + } + // Complete the CFI for the FPR saves, modelling them as taking effect + // after the last save. + for (auto CFIIndex : CFIIndexes) { + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } +} + +void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + auto *ZII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + + // Skip the return instruction. + assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks"); + + uint64_t StackSize = getAllocatedStackSize(MF); + if (ZFI->getLowSavedGPR()) { + --MBBI; + unsigned Opcode = MBBI->getOpcode(); + if (Opcode != SystemZ::LMG) + llvm_unreachable("Expected to see callee-save register restore code"); + + unsigned AddrOpNo = 2; + DebugLoc DL = MBBI->getDebugLoc(); + uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm(); + unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); + + // If the offset is too large, use the largest stack-aligned offset + // and add the rest to the base register (the stack or frame pointer). + if (!NewOpcode) { + uint64_t NumBytes = Offset - 0x7fff8; + emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(), + NumBytes, ZII); + Offset -= NumBytes; + NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); + assert(NewOpcode && "No restore instruction available"); + } + + MBBI->setDesc(ZII->get(NewOpcode)); + MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset); + } else if (StackSize) { + DebugLoc DL = MBBI->getDebugLoc(); + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII); + } +} + +bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { + return (MF.getTarget().Options.DisableFramePointerElim(MF) || + MF.getFrameInfo()->hasVarSizedObjects() || + MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP()); +} + +int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + const MachineFrameInfo *MFFrame = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + // Fill in FrameReg output argument. + FrameReg = RI->getFrameRegister(MF); + + // Start with the offset of FI from the top of the caller-allocated frame + // (i.e. the top of the 160 bytes allocated by the caller). This initial + // offset is therefore negative. + int64_t Offset = (MFFrame->getObjectOffset(FI) + + MFFrame->getOffsetAdjustment()); + + // Make the offset relative to the incoming stack pointer. + Offset -= getOffsetOfLocalArea(); + + // Make the offset relative to the bottom of the frame. + Offset += getAllocatedStackSize(MF); + + return Offset; +} + +uint64_t SystemZFrameLowering:: +getAllocatedStackSize(const MachineFunction &MF) const { + const MachineFrameInfo *MFFrame = MF.getFrameInfo(); + + // Start with the size of the local variables and spill slots. + uint64_t StackSize = MFFrame->getStackSize(); + + // We need to allocate the ABI-defined 160-byte base area whenever + // we allocate stack space for our own use and whenever we call another + // function. + if (StackSize || MFFrame->hasVarSizedObjects() || MFFrame->hasCalls()) + StackSize += SystemZMC::CallFrameSize; + + return StackSize; +} + +bool +SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // The ABI requires us to allocate 160 bytes of stack space for the callee, + // with any outgoing stack arguments being placed above that. It seems + // better to make that area a permanent feature of the frame even if + // we're using a frame pointer. + return true; +} + +void SystemZFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + switch (MI->getOpcode()) { + case SystemZ::ADJCALLSTACKDOWN: + case SystemZ::ADJCALLSTACKUP: + assert(hasReservedCallFrame(MF) && + "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); + MBB.erase(MI); + break; + + default: + llvm_unreachable("Unexpected call frame instruction"); + } +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h new file mode 100644 index 0000000..46bb6b7 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -0,0 +1,65 @@ +//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H + +#include "llvm/ADT/IndexedMap.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { +class SystemZTargetMachine; +class SystemZSubtarget; + +class SystemZFrameLowering : public TargetFrameLowering { + IndexedMap<unsigned> RegSpillOffsets; + +public: + SystemZFrameLowering(); + + // Override TargetFrameLowering. + bool isFPCloseToIncomingSP() const override { return false; } + const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const + override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBII, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const + override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + bool hasFP(const MachineFunction &MF) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const + override; + + // Return the number of bytes in the callee-allocated part of the frame. + uint64_t getAllocatedStackSize(const MachineFunction &MF) const; + + // Return the byte offset from the incoming stack pointer of Reg's + // ABI-defined save slot. Return 0 if no slot is defined for Reg. + unsigned getRegSpillOffset(unsigned Reg) const { + return RegSpillOffsets[Reg]; + } +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp new file mode 100644 index 0000000..a909309 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -0,0 +1,1311 @@ +//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the SystemZ target. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-isel" + +namespace { +// Used to build addressing modes. +struct SystemZAddressingMode { + // The shape of the address. + enum AddrForm { + // base+displacement + FormBD, + + // base+displacement+index for load and store operands + FormBDXNormal, + + // base+displacement+index for load address operands + FormBDXLA, + + // base+displacement+index+ADJDYNALLOC + FormBDXDynAlloc + }; + AddrForm Form; + + // The type of displacement. The enum names here correspond directly + // to the definitions in SystemZOperand.td. We could split them into + // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it. + enum DispRange { + Disp12Only, + Disp12Pair, + Disp20Only, + Disp20Only128, + Disp20Pair + }; + DispRange DR; + + // The parts of the address. The address is equivalent to: + // + // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0) + SDValue Base; + int64_t Disp; + SDValue Index; + bool IncludesDynAlloc; + + SystemZAddressingMode(AddrForm form, DispRange dr) + : Form(form), DR(dr), Base(), Disp(0), Index(), + IncludesDynAlloc(false) {} + + // True if the address can have an index register. + bool hasIndexField() { return Form != FormBD; } + + // True if the address can (and must) include ADJDYNALLOC. + bool isDynAlloc() { return Form == FormBDXDynAlloc; } + + void dump() { + errs() << "SystemZAddressingMode " << this << '\n'; + + errs() << " Base "; + if (Base.getNode()) + Base.getNode()->dump(); + else + errs() << "null\n"; + + if (hasIndexField()) { + errs() << " Index "; + if (Index.getNode()) + Index.getNode()->dump(); + else + errs() << "null\n"; + } + + errs() << " Disp " << Disp; + if (IncludesDynAlloc) + errs() << " + ADJDYNALLOC"; + errs() << '\n'; + } +}; + +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + assert(Count <= 64); + if (Count > 63) + return UINT64_MAX; + return (uint64_t(1) << Count) - 1; +} + +// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation +// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and +// Rotate (I5). The combined operand value is effectively: +// +// (or (rotl Input, Rotate), ~Mask) +// +// for RNSBG and: +// +// (and (rotl Input, Rotate), Mask) +// +// otherwise. The output value has BitSize bits, although Input may be +// narrower (in which case the upper bits are don't care). +struct RxSBGOperands { + RxSBGOperands(unsigned Op, SDValue N) + : Opcode(Op), BitSize(N.getValueType().getSizeInBits()), + Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63), + Rotate(0) {} + + unsigned Opcode; + unsigned BitSize; + uint64_t Mask; + SDValue Input; + unsigned Start; + unsigned End; + unsigned Rotate; +}; + +class SystemZDAGToDAGISel : public SelectionDAGISel { + const SystemZSubtarget *Subtarget; + + // Used by SystemZOperands.td to create integer constants. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) const { + return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0)); + } + + const SystemZTargetMachine &getTargetMachine() const { + return static_cast<const SystemZTargetMachine &>(TM); + } + + const SystemZInstrInfo *getInstrInfo() const { + return Subtarget->getInstrInfo(); + } + + // Try to fold more of the base or index of AM into AM, where IsBase + // selects between the base and index. + bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const; + + // Try to describe N in AM, returning true on success. + bool selectAddress(SDValue N, SystemZAddressingMode &AM) const; + + // Extract individual target operands from matched address AM. + void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, + SDValue &Base, SDValue &Disp) const; + void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, + SDValue &Base, SDValue &Disp, SDValue &Index) const; + + // Try to match Addr as a FormBD address with displacement type DR. + // Return true on success, storing the base and displacement in + // Base and Disp respectively. + bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp) const; + + // Try to match Addr as a FormBDX address with displacement type DR. + // Return true on success and if the result had no index. Store the + // base and displacement in Base and Disp respectively. + bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp) const; + + // Try to match Addr as a FormBDX* address of form Form with + // displacement type DR. Return true on success, storing the base, + // displacement and index in Base, Disp and Index respectively. + bool selectBDXAddr(SystemZAddressingMode::AddrForm Form, + SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index) const; + + // PC-relative address matching routines used by SystemZOperands.td. + bool selectPCRelAddress(SDValue Addr, SDValue &Target) const { + if (SystemZISD::isPCREL(Addr.getOpcode())) { + Target = Addr.getOperand(0); + return true; + } + return false; + } + + // BD matching routines used by SystemZOperands.td. + bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp); + } + bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); + } + bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp); + } + bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); + } + + // MVI matching routines used by SystemZOperands.td. + bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); + } + bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); + } + + // BDX matching routines used by SystemZOperands.td. + bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp12Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp12Pair, + Addr, Base, Disp, Index); + } + bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc, + SystemZAddressingMode::Disp12Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Only128, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Pair, + Addr, Base, Disp, Index); + } + bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXLA, + SystemZAddressingMode::Disp12Pair, + Addr, Base, Disp, Index); + } + bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) const { + return selectBDXAddr(SystemZAddressingMode::FormBDXLA, + SystemZAddressingMode::Disp20Pair, + Addr, Base, Disp, Index); + } + + // Try to match Addr as an address with a base, 12-bit displacement + // and index, where the index is element Elem of a vector. + // Return true on success, storing the base, displacement and vector + // in Base, Disp and Index respectively. + bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base, + SDValue &Disp, SDValue &Index) const; + + // Check whether (or Op (and X InsertMask)) is effectively an insertion + // of X into bits InsertMask of some Y != Op. Return true if so and + // set Op to that Y. + bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const; + + // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used. + // Return true on success. + bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const; + + // Try to fold some of RxSBG.Input into other fields of RxSBG. + // Return true on success. + bool expandRxSBG(RxSBGOperands &RxSBG) const; + + // Return an undefined value of type VT. + SDValue getUNDEF(SDLoc DL, EVT VT) const; + + // Convert N to VT, if it isn't already. + SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const; + + // Try to implement AND or shift node N using RISBG with the zero flag set. + // Return the selected node on success, otherwise return null. + SDNode *tryRISBGZero(SDNode *N); + + // Try to use RISBG or Opcode to implement OR or XOR node N. + // Return the selected node on success, otherwise return null. + SDNode *tryRxSBG(SDNode *N, unsigned Opcode); + + // If Op0 is null, then Node is a constant that can be loaded using: + // + // (Opcode UpperVal LowerVal) + // + // If Op0 is nonnull, then Node can be implemented using: + // + // (Opcode (Opcode Op0 UpperVal) LowerVal) + SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, + uint64_t UpperVal, uint64_t LowerVal); + + // Try to use gather instruction Opcode to implement vector insertion N. + SDNode *tryGather(SDNode *N, unsigned Opcode); + + // Try to use scatter instruction Opcode to implement store Store. + SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode); + + // Return true if Load and Store are loads and stores of the same size + // and are guaranteed not to overlap. Such operations can be implemented + // using block (SS-format) instructions. + // + // Partial overlap would lead to incorrect code, since the block operations + // are logically bytewise, even though they have a fast path for the + // non-overlapping case. We also need to avoid full overlap (i.e. two + // addresses that might be equal at run time) because although that case + // would be handled correctly, it might be implemented by millicode. + bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const; + + // N is a (store (load Y), X) pattern. Return true if it can use an MVC + // from Y to X. + bool storeLoadCanUseMVC(SDNode *N) const; + + // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true + // if A[1 - I] == X and if N can use a block operation like NC from A[I] + // to X. + bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + +public: + SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget<SystemZSubtarget>(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + // Override MachineFunctionPass. + const char *getPassName() const override { + return "SystemZ DAG->DAG Pattern Instruction Selection"; + } + + // Override SelectionDAGISel. + SDNode *Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector<SDValue> &OutOps) override; + + // Include the pieces autogenerated from the target description. + #include "SystemZGenDAGISel.inc" +}; +} // end anonymous namespace + +FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new SystemZDAGToDAGISel(TM, OptLevel); +} + +// Return true if Val should be selected as a displacement for an address +// with range DR. Here we're interested in the range of both the instruction +// described by DR and of any pairing instruction. +static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { + switch (DR) { + case SystemZAddressingMode::Disp12Only: + return isUInt<12>(Val); + + case SystemZAddressingMode::Disp12Pair: + case SystemZAddressingMode::Disp20Only: + case SystemZAddressingMode::Disp20Pair: + return isInt<20>(Val); + + case SystemZAddressingMode::Disp20Only128: + return isInt<20>(Val) && isInt<20>(Val + 8); + } + llvm_unreachable("Unhandled displacement range"); +} + +// Change the base or index in AM to Value, where IsBase selects +// between the base and index. +static void changeComponent(SystemZAddressingMode &AM, bool IsBase, + SDValue Value) { + if (IsBase) + AM.Base = Value; + else + AM.Index = Value; +} + +// The base or index of AM is equivalent to Value + ADJDYNALLOC, +// where IsBase selects between the base and index. Try to fold the +// ADJDYNALLOC into AM. +static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase, + SDValue Value) { + if (AM.isDynAlloc() && !AM.IncludesDynAlloc) { + changeComponent(AM, IsBase, Value); + AM.IncludesDynAlloc = true; + return true; + } + return false; +} + +// The base of AM is equivalent to Base + Index. Try to use Index as +// the index register. +static bool expandIndex(SystemZAddressingMode &AM, SDValue Base, + SDValue Index) { + if (AM.hasIndexField() && !AM.Index.getNode()) { + AM.Base = Base; + AM.Index = Index; + return true; + } + return false; +} + +// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects +// between the base and index. Try to fold Op1 into AM's displacement. +static bool expandDisp(SystemZAddressingMode &AM, bool IsBase, + SDValue Op0, uint64_t Op1) { + // First try adjusting the displacement. + int64_t TestDisp = AM.Disp + Op1; + if (selectDisp(AM.DR, TestDisp)) { + changeComponent(AM, IsBase, Op0); + AM.Disp = TestDisp; + return true; + } + + // We could consider forcing the displacement into a register and + // using it as an index, but it would need to be carefully tuned. + return false; +} + +bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, + bool IsBase) const { + SDValue N = IsBase ? AM.Base : AM.Index; + unsigned Opcode = N.getOpcode(); + if (Opcode == ISD::TRUNCATE) { + N = N.getOperand(0); + Opcode = N.getOpcode(); + } + if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) { + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + + unsigned Op0Code = Op0->getOpcode(); + unsigned Op1Code = Op1->getOpcode(); + + if (Op0Code == SystemZISD::ADJDYNALLOC) + return expandAdjDynAlloc(AM, IsBase, Op1); + if (Op1Code == SystemZISD::ADJDYNALLOC) + return expandAdjDynAlloc(AM, IsBase, Op0); + + if (Op0Code == ISD::Constant) + return expandDisp(AM, IsBase, Op1, + cast<ConstantSDNode>(Op0)->getSExtValue()); + if (Op1Code == ISD::Constant) + return expandDisp(AM, IsBase, Op0, + cast<ConstantSDNode>(Op1)->getSExtValue()); + + if (IsBase && expandIndex(AM, Op0, Op1)) + return true; + } + if (Opcode == SystemZISD::PCREL_OFFSET) { + SDValue Full = N.getOperand(0); + SDValue Base = N.getOperand(1); + SDValue Anchor = Base.getOperand(0); + uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() - + cast<GlobalAddressSDNode>(Anchor)->getOffset()); + return expandDisp(AM, IsBase, Base, Offset); + } + return false; +} + +// Return true if an instruction with displacement range DR should be +// used for displacement value Val. selectDisp(DR, Val) must already hold. +static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { + assert(selectDisp(DR, Val) && "Invalid displacement"); + switch (DR) { + case SystemZAddressingMode::Disp12Only: + case SystemZAddressingMode::Disp20Only: + case SystemZAddressingMode::Disp20Only128: + return true; + + case SystemZAddressingMode::Disp12Pair: + // Use the other instruction if the displacement is too large. + return isUInt<12>(Val); + + case SystemZAddressingMode::Disp20Pair: + // Use the other instruction if the displacement is small enough. + return !isUInt<12>(Val); + } + llvm_unreachable("Unhandled displacement range"); +} + +// Return true if Base + Disp + Index should be performed by LA(Y). +static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) { + // Don't use LA(Y) for constants. + if (!Base) + return false; + + // Always use LA(Y) for frame addresses, since we know that the destination + // register is almost always (perhaps always) going to be different from + // the frame register. + if (Base->getOpcode() == ISD::FrameIndex) + return true; + + if (Disp) { + // Always use LA(Y) if there is a base, displacement and index. + if (Index) + return true; + + // Always use LA if the displacement is small enough. It should always + // be no worse than AGHI (and better if it avoids a move). + if (isUInt<12>(Disp)) + return true; + + // For similar reasons, always use LAY if the constant is too big for AGHI. + // LAY should be no worse than AGFI. + if (!isInt<16>(Disp)) + return true; + } else { + // Don't use LA for plain registers. + if (!Index) + return false; + + // Don't use LA for plain addition if the index operand is only used + // once. It should be a natural two-operand addition in that case. + if (Index->hasOneUse()) + return false; + + // Prefer addition if the second operation is sign-extended, in the + // hope of using AGF. + unsigned IndexOpcode = Index->getOpcode(); + if (IndexOpcode == ISD::SIGN_EXTEND || + IndexOpcode == ISD::SIGN_EXTEND_INREG) + return false; + } + + // Don't use LA for two-operand addition if either operand is only + // used once. The addition instructions are better in that case. + if (Base->hasOneUse()) + return false; + + return true; +} + +// Return true if Addr is suitable for AM, updating AM if so. +bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, + SystemZAddressingMode &AM) const { + // Start out assuming that the address will need to be loaded separately, + // then try to extend it as much as we can. + AM.Base = Addr; + + // First try treating the address as a constant. + if (Addr.getOpcode() == ISD::Constant && + expandDisp(AM, true, SDValue(), + cast<ConstantSDNode>(Addr)->getSExtValue())) + ; + else + // Otherwise try expanding each component. + while (expandAddress(AM, true) || + (AM.Index.getNode() && expandAddress(AM, false))) + continue; + + // Reject cases where it isn't profitable to use LA(Y). + if (AM.Form == SystemZAddressingMode::FormBDXLA && + !shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode())) + return false; + + // Reject cases where the other instruction in a pair should be used. + if (!isValidDisp(AM.DR, AM.Disp)) + return false; + + // Make sure that ADJDYNALLOC is included where necessary. + if (AM.isDynAlloc() && !AM.IncludesDynAlloc) + return false; + + DEBUG(AM.dump()); + return true; +} + +// Insert a node into the DAG at least before Pos. This will reposition +// the node as needed, and will assign it a node ID that is <= Pos's ID. +// Note that this does *not* preserve the uniqueness of node IDs! +// The selection DAG must no longer depend on their uniqueness when this +// function is used. +static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { + if (N.getNode()->getNodeId() == -1 || + N.getNode()->getNodeId() > Pos->getNodeId()) { + DAG->RepositionNode(Pos->getIterator(), N.getNode()); + N.getNode()->setNodeId(Pos->getNodeId()); + } +} + +void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, + EVT VT, SDValue &Base, + SDValue &Disp) const { + Base = AM.Base; + if (!Base.getNode()) + // Register 0 means "no base". This is mostly useful for shifts. + Base = CurDAG->getRegister(0, VT); + else if (Base.getOpcode() == ISD::FrameIndex) { + // Lower a FrameIndex to a TargetFrameIndex. + int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FrameIndex, VT); + } else if (Base.getValueType() != VT) { + // Truncate values from i64 to i32, for shifts. + assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 && + "Unexpected truncation"); + SDLoc DL(Base); + SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base); + insertDAGNode(CurDAG, Base.getNode(), Trunc); + Base = Trunc; + } + + // Lower the displacement to a TargetConstant. + Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT); +} + +void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, + EVT VT, SDValue &Base, + SDValue &Disp, + SDValue &Index) const { + getAddressOperands(AM, VT, Base, Disp); + + Index = AM.Index; + if (!Index.getNode()) + // Register 0 means "no index". + Index = CurDAG->getRegister(0, VT); +} + +bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp) const { + SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR); + if (!selectAddress(Addr, AM)) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp); + return true; +} + +bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp) const { + SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR); + if (!selectAddress(Addr, AM) || AM.Index.getNode()) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp); + return true; +} + +bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, + SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp, SDValue &Index) const { + SystemZAddressingMode AM(Form, DR); + if (!selectAddress(Addr, AM)) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index); + return true; +} + +bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem, + SDValue &Base, + SDValue &Disp, + SDValue &Index) const { + SDValue Regs[2]; + if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) && + Regs[0].getNode() && Regs[1].getNode()) { + for (unsigned int I = 0; I < 2; ++I) { + Base = Regs[I]; + Index = Regs[1 - I]; + // We can't tell here whether the index vector has the right type + // for the access; the caller needs to do that instead. + if (Index.getOpcode() == ISD::ZERO_EXTEND) + Index = Index.getOperand(0); + if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Index.getOperand(1) == Elem) { + Index = Index.getOperand(0); + return true; + } + } + } + return false; +} + +bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, + uint64_t InsertMask) const { + // We're only interested in cases where the insertion is into some operand + // of Op, rather than into Op itself. The only useful case is an AND. + if (Op.getOpcode() != ISD::AND) + return false; + + // We need a constant mask. + auto *MaskNode = dyn_cast<ConstantSDNode>(Op.getOperand(1).getNode()); + if (!MaskNode) + return false; + + // It's not an insertion of Op.getOperand(0) if the two masks overlap. + uint64_t AndMask = MaskNode->getZExtValue(); + if (InsertMask & AndMask) + return false; + + // It's only an insertion if all bits are covered or are known to be zero. + // The inner check covers all cases but is more expensive. + uint64_t Used = allOnes(Op.getValueType().getSizeInBits()); + if (Used != (AndMask | InsertMask)) { + APInt KnownZero, KnownOne; + CurDAG->computeKnownBits(Op.getOperand(0), KnownZero, KnownOne); + if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) + return false; + } + + Op = Op.getOperand(0); + return true; +} + +bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, + uint64_t Mask) const { + const SystemZInstrInfo *TII = getInstrInfo(); + if (RxSBG.Rotate != 0) + Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)); + Mask &= RxSBG.Mask; + if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) { + RxSBG.Mask = Mask; + return true; + } + return false; +} + +// Return true if any bits of (RxSBG.Input & Mask) are significant. +static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) { + // Rotate the mask in the same way as RxSBG.Input is rotated. + if (RxSBG.Rotate != 0) + Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate))); + return (Mask & RxSBG.Mask) != 0; +} + +bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { + SDValue N = RxSBG.Input; + unsigned Opcode = N.getOpcode(); + switch (Opcode) { + case ISD::AND: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + + auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known zeros, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->computeKnownBits(Input, KnownZero, KnownOne); + Mask |= KnownZero.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::OR: { + if (RxSBG.Opcode != SystemZ::RNSBG) + return false; + + auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = ~MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known ones, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->computeKnownBits(Input, KnownZero, KnownOne); + Mask &= ~KnownOne.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::ROTL: { + // Any 64-bit rotate left can be merged into the RxSBG. + if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64) + return false; + auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::ANY_EXTEND: + // Bits above the extended operand are don't-care. + RxSBG.Input = N.getOperand(0); + return true; + + case ISD::ZERO_EXTEND: + if (RxSBG.Opcode != SystemZ::RNSBG) { + // Restrict the mask to the extended operand. + unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits(); + if (!refineRxSBGMask(RxSBG, allOnes(InnerBitSize))) + return false; + + RxSBG.Input = N.getOperand(0); + return true; + } + // Fall through. + + case ISD::SIGN_EXTEND: { + // Check that the extension bits are don't-care (i.e. are masked out + // by the final mask). + unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits(); + if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize))) + return false; + + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SHL: { + auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + unsigned BitSize = N.getValueType().getSizeInBits(); + if (Count < 1 || Count >= BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG) { + // Treat (shl X, count) as (rotl X, size-count) as long as the bottom + // count bits from RxSBG.Input are ignored. + if (maskMatters(RxSBG, allOnes(Count))) + return false; + } else { + // Treat (shl X, count) as (and (rotl X, count), ~0<<count). + if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count)) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate + Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SRL: + case ISD::SRA: { + auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + unsigned BitSize = N.getValueType().getSizeInBits(); + if (Count < 1 || Count >= BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { + // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top + // count bits from RxSBG.Input are ignored. + if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count))) + return false; + } else { + // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), + // which is similar to SLL above. + if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count))) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + default: + return false; + } +} + +SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const { + SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return SDValue(N, 0); +} + +SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const { + if (N.getValueType() == MVT::i32 && VT == MVT::i64) + return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32, + DL, VT, getUNDEF(DL, MVT::i64), N); + if (N.getValueType() == MVT::i64 && VT == MVT::i32) + return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N); + assert(N.getValueType() == VT && "Unexpected value types"); + return N; +} + +SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + if (!VT.isInteger() || VT.getSizeInBits() > 64) + return nullptr; + RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); + unsigned Count = 0; + while (expandRxSBG(RISBG)) + if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND) + Count += 1; + if (Count == 0) + return nullptr; + if (Count == 1) { + // Prefer to use normal shift instructions over RISBG, since they can handle + // all cases and are sometimes shorter. + if (N->getOpcode() != ISD::AND) + return nullptr; + + // Prefer register extensions like LLC over RISBG. Also prefer to start + // out with normal ANDs if one instruction would be enough. We can convert + // these ANDs into an RISBG later if a three-address instruction is useful. + if (VT == MVT::i32 || + RISBG.Mask == 0xff || + RISBG.Mask == 0xffff || + SystemZ::isImmLF(~RISBG.Mask) || + SystemZ::isImmHF(~RISBG.Mask)) { + // Force the new mask into the DAG, since it may include known-one bits. + auto *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode()); + if (MaskN->getZExtValue() != RISBG.Mask) { + SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT); + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask); + return SelectCode(N); + } + return nullptr; + } + } + + // If the RISBG operands require no rotation and just masks the bottom + // 8/16 bits, attempt to convert this to a LLC zero extension. + if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) { + unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR); + if (VT == MVT::i32) { + if (Subtarget->hasHighWord()) + OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux); + else + OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR); + } + + SDValue In = convertTo(DL, VT, RISBG.Input); + N = CurDAG->getMachineNode(OpCode, DL, VT, In); + return convertTo(DL, VT, SDValue(N, 0)).getNode(); + } + + unsigned Opcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; + EVT OpcodeVT = MVT::i64; + if (VT == MVT::i32 && Subtarget->hasHighWord()) { + Opcode = SystemZ::RISBMux; + OpcodeVT = MVT::i32; + RISBG.Start &= 31; + RISBG.End &= 31; + } + SDValue Ops[5] = { + getUNDEF(DL, OpcodeVT), + convertTo(DL, OpcodeVT, RISBG.Input), + CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32), + CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32), + CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32) + }; + N = CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops); + return convertTo(DL, VT, SDValue(N, 0)).getNode(); +} + +SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + if (!VT.isInteger() || VT.getSizeInBits() > 64) + return nullptr; + // Try treating each operand of N as the second operand of the RxSBG + // and see which goes deepest. + RxSBGOperands RxSBG[] = { + RxSBGOperands(Opcode, N->getOperand(0)), + RxSBGOperands(Opcode, N->getOperand(1)) + }; + unsigned Count[] = { 0, 0 }; + for (unsigned I = 0; I < 2; ++I) + while (expandRxSBG(RxSBG[I])) + if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND) + Count[I] += 1; + + // Do nothing if neither operand is suitable. + if (Count[0] == 0 && Count[1] == 0) + return nullptr; + + // Pick the deepest second operand. + unsigned I = Count[0] > Count[1] ? 0 : 1; + SDValue Op0 = N->getOperand(I ^ 1); + + // Prefer IC for character insertions from memory. + if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) + if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode())) + if (Load->getMemoryVT() == MVT::i8) + return nullptr; + + // See whether we can avoid an AND in the first operand by converting + // ROSBG to RISBG. + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) { + Opcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; + } + + SDValue Ops[5] = { + convertTo(DL, MVT::i64, Op0), + convertTo(DL, MVT::i64, RxSBG[I].Input), + CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32) + }; + N = CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops); + return convertTo(DL, VT, SDValue(N, 0)).getNode(); +} + +SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, + SDValue Op0, uint64_t UpperVal, + uint64_t LowerVal) { + EVT VT = Node->getValueType(0); + SDLoc DL(Node); + SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT); + if (Op0.getNode()) + Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper); + Upper = SDValue(Select(Upper.getNode()), 0); + + SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT); + SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower); + return Or.getNode(); +} + +SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { + SDValue ElemV = N->getOperand(2); + auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); + if (!ElemN) + return 0; + + unsigned Elem = ElemN->getZExtValue(); + EVT VT = N->getValueType(0); + if (Elem >= VT.getVectorNumElements()) + return 0; + + auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1)); + if (!Load || !Load->hasOneUse()) + return 0; + if (Load->getMemoryVT().getSizeInBits() != + Load->getValueType(0).getSizeInBits()) + return 0; + + SDValue Base, Disp, Index; + if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) || + Index.getValueType() != VT.changeVectorElementTypeToInteger()) + return 0; + + SDLoc DL(Load); + SDValue Ops[] = { + N->getOperand(0), Base, Disp, Index, + CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain() + }; + SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops); + ReplaceUses(SDValue(Load, 1), SDValue(Res, 1)); + return Res; +} + +SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { + SDValue Value = Store->getValue(); + if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return 0; + if (Store->getMemoryVT().getSizeInBits() != + Value.getValueType().getSizeInBits()) + return 0; + + SDValue ElemV = Value.getOperand(1); + auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); + if (!ElemN) + return 0; + + SDValue Vec = Value.getOperand(0); + EVT VT = Vec.getValueType(); + unsigned Elem = ElemN->getZExtValue(); + if (Elem >= VT.getVectorNumElements()) + return 0; + + SDValue Base, Disp, Index; + if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) || + Index.getValueType() != VT.changeVectorElementTypeToInteger()) + return 0; + + SDLoc DL(Store); + SDValue Ops[] = { + Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32), + Store->getChain() + }; + return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); +} + +bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, + LoadSDNode *Load) const { + // Check that the two memory operands have the same size. + if (Load->getMemoryVT() != Store->getMemoryVT()) + return false; + + // Volatility stops an access from being decomposed. + if (Load->isVolatile() || Store->isVolatile()) + return false; + + // There's no chance of overlap if the load is invariant. + if (Load->isInvariant()) + return true; + + // Otherwise we need to check whether there's an alias. + const Value *V1 = Load->getMemOperand()->getValue(); + const Value *V2 = Store->getMemOperand()->getValue(); + if (!V1 || !V2) + return false; + + // Reject equality. + uint64_t Size = Load->getMemoryVT().getStoreSize(); + int64_t End1 = Load->getSrcValueOffset() + Size; + int64_t End2 = Store->getSrcValueOffset() + Size; + if (V1 == V2 && End1 == End2) + return false; + + return !AA->alias(MemoryLocation(V1, End1, Load->getAAInfo()), + MemoryLocation(V2, End2, Store->getAAInfo())); +} + +bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { + auto *Store = cast<StoreSDNode>(N); + auto *Load = cast<LoadSDNode>(Store->getValue()); + + // Prefer not to use MVC if either address can use ... RELATIVE LONG + // instructions. + uint64_t Size = Load->getMemoryVT().getStoreSize(); + if (Size > 1 && Size <= 8) { + // Prefer LHRL, LRL and LGRL. + if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode())) + return false; + // Prefer STHRL, STRL and STGRL. + if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode())) + return false; + } + + return canUseBlockOperation(Store, Load); +} + +bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, + unsigned I) const { + auto *StoreA = cast<StoreSDNode>(N); + auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I)); + auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I)); + return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB); +} + +SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return nullptr; + } + + unsigned Opcode = Node->getOpcode(); + SDNode *ResNode = nullptr; + switch (Opcode) { + case ISD::OR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::ROSBG); + goto or_xor; + + case ISD::XOR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RXSBG); + // Fall through. + or_xor: + // If this is a 64-bit operation in which both 32-bit halves are nonzero, + // split the operation into two. + if (!ResNode && Node->getValueType(0) == MVT::i64) + if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { + uint64_t Val = Op1->getZExtValue(); + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) + Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0), + Val - uint32_t(Val), uint32_t(Val)); + } + break; + + case ISD::AND: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RNSBG); + // Fall through. + case ISD::ROTL: + case ISD::SHL: + case ISD::SRL: + case ISD::ZERO_EXTEND: + if (!ResNode) + ResNode = tryRISBGZero(Node); + break; + + case ISD::Constant: + // If this is a 64-bit constant that is out of the range of LLILF, + // LLIHF and LGFI, split it into two 32-bit pieces. + if (Node->getValueType(0) == MVT::i64) { + uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue(); + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) + Node = splitLargeImmediate(ISD::OR, Node, SDValue(), + Val - uint32_t(Val), uint32_t(Val)); + } + break; + + case SystemZISD::SELECT_CCMASK: { + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + // Prefer to put any load first, so that it can be matched as a + // conditional load. + if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) { + SDValue CCValid = Node->getOperand(2); + SDValue CCMask = Node->getOperand(3); + uint64_t ConstCCValid = + cast<ConstantSDNode>(CCValid.getNode())->getZExtValue(); + uint64_t ConstCCMask = + cast<ConstantSDNode>(CCMask.getNode())->getZExtValue(); + // Invert the condition. + CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node), + CCMask.getValueType()); + SDValue Op4 = Node->getOperand(4); + Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); + } + break; + } + + case ISD::INSERT_VECTOR_ELT: { + EVT VT = Node->getValueType(0); + unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); + if (ElemBitSize == 32) + ResNode = tryGather(Node, SystemZ::VGEF); + else if (ElemBitSize == 64) + ResNode = tryGather(Node, SystemZ::VGEG); + break; + } + + case ISD::STORE: { + auto *Store = cast<StoreSDNode>(Node); + unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits(); + if (ElemBitSize == 32) + ResNode = tryScatter(Store, SystemZ::VSCEF); + else if (ElemBitSize == 64) + ResNode = tryScatter(Store, SystemZ::VSCEG); + break; + } + } + + // Select the default instruction + if (!ResNode) + ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "; + if (ResNode == nullptr || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + errs() << "\n"; + ); + return ResNode; +} + +bool SystemZDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, + unsigned ConstraintID, + std::vector<SDValue> &OutOps) { + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_R: + case InlineAsm::Constraint_S: + case InlineAsm::Constraint_T: + // Accept addresses with short displacements, which are compatible + // with Q, R, S and T. But keep the index operand for future expansion. + SDValue Base, Disp, Index; + if (selectBDXAddr(SystemZAddressingMode::FormBD, + SystemZAddressingMode::Disp12Only, + Op, Base, Disp, Index)) { + OutOps.push_back(Base); + OutOps.push_back(Disp); + OutOps.push_back(Index); + return false; + } + break; + } + return true; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp new file mode 100644 index 0000000..ee73267 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -0,0 +1,5929 @@ +//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZISelLowering.h" +#include "SystemZCallingConv.h" +#include "SystemZConstantPoolValue.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Intrinsics.h" +#include <cctype> + +using namespace llvm; + +#define DEBUG_TYPE "systemz-lower" + +namespace { +// Represents a sequence for extracting a 0/1 value from an IPM result: +// (((X ^ XORValue) + AddValue) >> Bit) +struct IPMConversion { + IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) + : XORValue(xorValue), AddValue(addValue), Bit(bit) {} + + int64_t XORValue; + int64_t AddValue; + unsigned Bit; +}; + +// Represents information about a comparison. +struct Comparison { + Comparison(SDValue Op0In, SDValue Op1In) + : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} + + // The operands to the comparison. + SDValue Op0, Op1; + + // The opcode that should be used to compare Op0 and Op1. + unsigned Opcode; + + // A SystemZICMP value. Only used for integer comparisons. + unsigned ICmpType; + + // The mask of CC values that Opcode can produce. + unsigned CCValid; + + // The mask of CC values for which the original condition is true. + unsigned CCMask; +}; +} // end anonymous namespace + +// Classify VT as either 32 or 64 bit. +static bool is32Bit(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i32: + return true; + case MVT::i64: + return false; + default: + llvm_unreachable("Unsupported type"); + } +} + +// Return a version of MachineOperand that can be safely used before the +// final use. +static MachineOperand earlyUseOperand(MachineOperand Op) { + if (Op.isReg()) + Op.setIsKill(false); + return Op; +} + +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, + const SystemZSubtarget &STI) + : TargetLowering(TM), Subtarget(STI) { + MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); + + // Set up the register classes. + if (Subtarget.hasHighWord()) + addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); + else + addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); + addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); + if (Subtarget.hasVector()) { + addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); + } else { + addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + } + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + + if (Subtarget.hasVector()) { + addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); + } + + // Compute derived properties from the register classes + computeRegisterProperties(Subtarget.getRegisterInfo()); + + // Set up special registers. + setStackPointerRegisterToSaveRestore(SystemZ::R15D); + + // TODO: It may be better to default to latency-oriented scheduling, however + // LLVM's current latency-oriented scheduler can't handle physreg definitions + // such as SystemZ has with CC, so set this to the register-pressure + // scheduler, because it can. + setSchedulingPreference(Sched::RegPressure); + + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Instructions are strings of 2-byte aligned 2-byte values. + setMinFunctionAlignment(2); + + // Handle operations that are handled in a similar way for all types. + for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; + I <= MVT::LAST_FP_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // Lower SET_CC into an IPM-based sequence. + setOperationAction(ISD::SETCC, VT, Custom); + + // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). + setOperationAction(ISD::SELECT, VT, Expand); + + // Lower SELECT_CC and BR_CC into separate comparisons and branches. + setOperationAction(ISD::SELECT_CC, VT, Custom); + setOperationAction(ISD::BR_CC, VT, Custom); + } + } + + // Expand jump table branches as address arithmetic followed by an + // indirect jump. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + // Expand BRCOND into a BR_CC (see above). + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + // Handle integer types. + for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; + I <= MVT::LAST_INTEGER_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // Expand individual DIV and REMs into DIVREMs. + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Custom); + setOperationAction(ISD::UDIVREM, VT, Custom); + + // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and + // stores, putting a serialization instruction after the stores. + setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); + setOperationAction(ISD::ATOMIC_STORE, VT, Custom); + + // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are + // available, or if the operand is constant. + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + + // Use POPCNT on z196 and above. + if (Subtarget.hasPopulationCount()) + setOperationAction(ISD::CTPOP, VT, Custom); + else + setOperationAction(ISD::CTPOP, VT, Expand); + + // No special instructions for these. + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + + // Use *MUL_LOHI where possible instead of MULH*. + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Custom); + setOperationAction(ISD::UMUL_LOHI, VT, Custom); + + // Only z196 and above have native support for conversions to unsigned. + if (!Subtarget.hasFPExtension()) + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + } + } + + // Type legalization will convert 8- and 16-bit atomic operations into + // forms that operate on i32s (but still keeping the original memory VT). + // Lower them into full i32 operations. + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + + // z10 has instructions for signed but not unsigned FP conversion. + // Handle unsigned 32-bit types as signed 64-bit types. + if (!Subtarget.hasFPExtension()) { + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + } + + // We have native support for a 64-bit CTLZ, via FLOGR. + setOperationAction(ISD::CTLZ, MVT::i32, Promote); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + + // Give LowerOperation the chance to replace 64-bit ORs with subregs. + setOperationAction(ISD::OR, MVT::i64, Custom); + + // FIXME: Can we support these natively? + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + + // We have native instructions for i8, i16 and i32 extensions, but not i1. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + } + + // Handle the various types of symbolic address. + setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); + setOperationAction(ISD::JumpTable, PtrVT, Custom); + + // We need to handle dynamic allocations specially because of the + // 160-byte area at the bottom of the stack. + setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); + + // Use custom expanders so that we can force the function to use + // a frame pointer. + setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + + // Handle prefetches with PFD or PFDRL. + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + + for (MVT VT : MVT::vector_valuetypes()) { + // Assume by default that all vector operations need to be expanded. + for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) + if (getOperationAction(Opcode, VT) == Legal) + setOperationAction(Opcode, VT, Expand); + + // Likewise all truncating stores and extending loads. + for (MVT InnerVT : MVT::vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } + + if (isTypeLegal(VT)) { + // These operations are legal for anything that can be stored in a + // vector register, even if there is no native support for the format + // as such. In particular, we can do these for v4f32 even though there + // are no specific instructions for that format. + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + + // Likewise, except that we need to replace the nodes with something + // more specific. + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + } + + // Handle integer vector types. + for (MVT VT : MVT::integer_vector_valuetypes()) { + if (isTypeLegal(VT)) { + // These operations have direct equivalents. + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + if (VT != MVT::v2i64) + setOperationAction(ISD::MUL, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTTZ, VT, Legal); + setOperationAction(ISD::CTLZ, VT, Legal); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); + + // Convert a GPR scalar to a vector by inserting it into element 0. + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + + // Use a series of unpacks for extensions. + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); + + // Detect shifts by a scalar amount and convert them into + // V*_BY_SCALAR. + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + + // At present ROTL isn't matched by DAGCombiner. ROTR should be + // converted into ROTL. + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + + // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands + // and inverting the result as necessary. + setOperationAction(ISD::SETCC, VT, Custom); + } + } + + if (Subtarget.hasVector()) { + // There should be no need to check for float types other than v2f64 + // since <2 x f32> isn't a legal type. + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + } + + // Handle floating-point types. + for (unsigned I = MVT::FIRST_FP_VALUETYPE; + I <= MVT::LAST_FP_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // We can use FI for FRINT. + setOperationAction(ISD::FRINT, VT, Legal); + + // We can use the extended form of FI for other rounding operations. + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUND, VT, Legal); + } + + // No special instructions for these. + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + } + } + + // Handle floating-point vector types. + if (Subtarget.hasVector()) { + // Scalar-to-vector conversion is just a subreg. + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + + // Some insertions and extractions can be done directly but others + // need to go via integers. + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + + // These operations have direct equivalents. + setOperationAction(ISD::FADD, MVT::v2f64, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Legal); + setOperationAction(ISD::FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMA, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FABS, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + } + + // We have fused multiply-addition for f32 and f64 but not f128. + setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FMA, MVT::f128, Expand); + + // Needed so that we don't try to implement f128 constant loads using + // a load-and-extend of a f80 constant (in cases where the constant + // would fit in an f80). + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + + // Floating-point truncation and stores need to be done separately. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + + // We have 64-bit FPR<->GPR moves, but need special handling for + // 32-bit forms. + if (!Subtarget.hasVector()) { + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::f32, Custom); + } + + // VASTART and VACOPY need to deal with the SystemZ-specific varargs + // structure, but VAEND is a no-op. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + + // Codes for which we want to perform some z-specific combinations. + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::FP_ROUND); + + // Handle intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // We want to use MVC in preference to even a single load/store pair. + MaxStoresPerMemcpy = 0; + MaxStoresPerMemcpyOptSize = 0; + + // The main memset sequence is a byte store followed by an MVC. + // Two STC or MV..I stores win over that, but the kind of fused stores + // generated by target-independent code don't when the byte value is + // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better + // than "STC;MVC". Handle the choice in target-specific code instead. + MaxStoresPerMemset = 0; + MaxStoresPerMemsetOptSize = 0; +} + +EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &, EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + case MVT::f128: + return false; + default: + break; + } + + return false; +} + +bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. + return Imm.isZero() || Imm.isNegZero(); +} + +bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // We can use CGFI or CLGFI. + return isInt<32>(Imm) || isUInt<32>(Imm); +} + +bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { + // We can use ALGFI or SLGFI. + return isUInt<32>(Imm) || isUInt<32>(-Imm); +} + +bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { + // Unaligned accesses should never be slower than the expanded version. + // We check specifically for aligned accesses in the few cases where + // they are required. + if (Fast) + *Fast = true; + return true; +} + +bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + // Punt on globals for now, although they can be used in limited + // RELATIVE LONG cases. + if (AM.BaseGV) + return false; + + // Require a 20-bit signed offset. + if (!isInt<20>(AM.BaseOffs)) + return false; + + // Indexing is OK but no scale factor can be applied. + return AM.Scale == 0 || AM.Scale == 1; +} + +bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { + if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) + return false; + unsigned FromBits = FromType->getPrimitiveSizeInBits(); + unsigned ToBits = ToType->getPrimitiveSizeInBits(); + return FromBits > ToBits; +} + +bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { + if (!FromVT.isInteger() || !ToVT.isInteger()) + return false; + unsigned FromBits = FromVT.getSizeInBits(); + unsigned ToBits = ToVT.getSizeInBits(); + return FromBits > ToBits; +} + +//===----------------------------------------------------------------------===// +// Inline asm support +//===----------------------------------------------------------------------===// + +TargetLowering::ConstraintType +SystemZTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'a': // Address register + case 'd': // Data register (equivalent to 'r') + case 'f': // Floating-point register + case 'h': // High-part register + case 'r': // General-purpose register + return C_RegisterClass; + + case 'Q': // Memory with base and unsigned 12-bit displacement + case 'R': // Likewise, plus an index + case 'S': // Memory with base and signed 20-bit displacement + case 'T': // Likewise, plus an index + case 'm': // Equivalent to 'T'. + return C_Memory; + + case 'I': // Unsigned 8-bit constant + case 'J': // Unsigned 12-bit constant + case 'K': // Signed 16-bit constant + case 'L': // Signed 20-bit displacement (on all targets we support) + case 'M': // 0x7fffffff + return C_Other; + + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight SystemZTargetLowering:: +getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + + case 'a': // Address register + case 'd': // Data register (equivalent to 'r') + case 'h': // High-part register + case 'r': // General-purpose register + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_Register; + break; + + case 'f': // Floating-point register + if (type->isFloatingPointTy()) + weight = CW_Register; + break; + + case 'I': // Unsigned 8-bit constant + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isUInt<8>(C->getZExtValue())) + weight = CW_Constant; + break; + + case 'J': // Unsigned 12-bit constant + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isUInt<12>(C->getZExtValue())) + weight = CW_Constant; + break; + + case 'K': // Signed 16-bit constant + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isInt<16>(C->getSExtValue())) + weight = CW_Constant; + break; + + case 'L': // Signed 20-bit displacement (on all targets we support) + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isInt<20>(C->getSExtValue())) + weight = CW_Constant; + break; + + case 'M': // 0x7fffffff + if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (C->getZExtValue() == 0x7fffffff) + weight = CW_Constant; + break; + } + return weight; +} + +// Parse a "{tNNN}" register constraint for which the register type "t" +// has already been verified. MC is the class associated with "t" and +// Map maps 0-based register numbers to LLVM register numbers. +static std::pair<unsigned, const TargetRegisterClass *> +parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, + const unsigned *Map) { + assert(*(Constraint.end()-1) == '}' && "Missing '}'"); + if (isdigit(Constraint[2])) { + unsigned Index; + bool Failed = + Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); + if (!Failed && Index < 16 && Map[Index]) + return std::make_pair(Map[Index], RC); + } + return std::make_pair(0U, nullptr); +} + +std::pair<unsigned, const TargetRegisterClass *> +SystemZTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'd': // Data register (equivalent to 'r') + case 'r': // General-purpose register + if (VT == MVT::i64) + return std::make_pair(0U, &SystemZ::GR64BitRegClass); + else if (VT == MVT::i128) + return std::make_pair(0U, &SystemZ::GR128BitRegClass); + return std::make_pair(0U, &SystemZ::GR32BitRegClass); + + case 'a': // Address register + if (VT == MVT::i64) + return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); + else if (VT == MVT::i128) + return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); + return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); + + case 'h': // High-part register (an LLVM extension) + return std::make_pair(0U, &SystemZ::GRH32BitRegClass); + + case 'f': // Floating-point register + if (VT == MVT::f64) + return std::make_pair(0U, &SystemZ::FP64BitRegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &SystemZ::FP128BitRegClass); + return std::make_pair(0U, &SystemZ::FP32BitRegClass); + } + } + if (Constraint.size() > 0 && Constraint[0] == '{') { + // We need to override the default register parsing for GPRs and FPRs + // because the interpretation depends on VT. The internal names of + // the registers are also different from the external names + // (F0D and F0S instead of F0, etc.). + if (Constraint[1] == 'r') { + if (VT == MVT::i32) + return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, + SystemZMC::GR32Regs); + if (VT == MVT::i128) + return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, + SystemZMC::GR128Regs); + return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, + SystemZMC::GR64Regs); + } + if (Constraint[1] == 'f') { + if (VT == MVT::f32) + return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, + SystemZMC::FP32Regs); + if (VT == MVT::f128) + return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, + SystemZMC::FP128Regs); + return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, + SystemZMC::FP64Regs); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +void SystemZTargetLowering:: +LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + // Only support length 1 constraints for now. + if (Constraint.length() == 1) { + switch (Constraint[0]) { + case 'I': // Unsigned 8-bit constant + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isUInt<8>(C->getZExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'J': // Unsigned 12-bit constant + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isUInt<12>(C->getZExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'K': // Signed 16-bit constant + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isInt<16>(C->getSExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'L': // Signed 20-bit displacement (on all targets we support) + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (isInt<20>(C->getSExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), + Op.getValueType())); + return; + + case 'M': // 0x7fffffff + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (C->getZExtValue() == 0x7fffffff) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType())); + return; + } + } + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +//===----------------------------------------------------------------------===// +// Calling conventions +//===----------------------------------------------------------------------===// + +#include "SystemZGenCallingConv.inc" + +bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, + Type *ToType) const { + return isTruncateFree(FromType, ToType); +} + +bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + return CI->isTailCall(); +} + +// We do not yet support 128-bit single-element vector types. If the user +// attempts to use such types as function argument or return type, prefer +// to error out instead of emitting code violating the ABI. +static void VerifyVectorType(MVT VT, EVT ArgVT) { + if (ArgVT.isVector() && !VT.isVector()) + report_fatal_error("Unsupported vector argument or return type"); +} + +static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) { + for (unsigned i = 0; i < Ins.size(); ++i) + VerifyVectorType(Ins[i].VT, Ins[i].ArgVT); +} + +static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) + VerifyVectorType(Outs[i].VT, Outs[i].ArgVT); +} + +// Value is a value that has been passed to us in the location described by VA +// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining +// any loads onto Chain. +static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, + CCValAssign &VA, SDValue Chain, + SDValue Value) { + // If the argument has been promoted from a smaller type, insert an + // assertion to capture this. + if (VA.getLocInfo() == CCValAssign::SExt) + Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, + DAG.getValueType(VA.getValVT())); + + if (VA.isExtInLoc()) + Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); + else if (VA.getLocInfo() == CCValAssign::Indirect) + Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, + MachinePointerInfo(), false, false, false, 0); + else if (VA.getLocInfo() == CCValAssign::BCvt) { + // If this is a short vector argument loaded from the stack, + // extend from i64 to full vector size and then bitcast. + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT().isVector()); + Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64, + Value, DAG.getUNDEF(MVT::i64)); + Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); + } else + assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); + return Value; +} + +// Value is a value of type VA.getValVT() that we need to copy into +// the location described by VA. Return a copy of Value converted to +// VA.getValVT(). The caller is responsible for handling indirect values. +static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, + CCValAssign &VA, SDValue Value) { + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::ZExt: + return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::AExt: + return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::BCvt: + // If this is a short vector argument to be stored to the stack, + // bitcast to v2i64 and then extract first element. + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT().isVector()); + Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, + DAG.getConstant(0, DL, MVT::i32)); + case CCValAssign::Full: + return Value; + default: + llvm_unreachable("Unhandled getLocInfo()"); + } +} + +SDValue SystemZTargetLowering:: +LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo<SystemZMachineFunctionInfo>(); + auto *TFL = + static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); + + // Detect unsupported vector argument types. + if (Subtarget.hasVector()) + VerifyVectorTypes(Ins); + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); + + unsigned NumFixedGPRs = 0; + unsigned NumFixedFPRs = 0; + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + SDValue ArgValue; + CCValAssign &VA = ArgLocs[I]; + EVT LocVT = VA.getLocVT(); + if (VA.isRegLoc()) { + // Arguments passed in registers + const TargetRegisterClass *RC; + switch (LocVT.getSimpleVT().SimpleTy) { + default: + // Integers smaller than i64 should be promoted to i64. + llvm_unreachable("Unexpected argument type"); + case MVT::i32: + NumFixedGPRs += 1; + RC = &SystemZ::GR32BitRegClass; + break; + case MVT::i64: + NumFixedGPRs += 1; + RC = &SystemZ::GR64BitRegClass; + break; + case MVT::f32: + NumFixedFPRs += 1; + RC = &SystemZ::FP32BitRegClass; + break; + case MVT::f64: + NumFixedFPRs += 1; + RC = &SystemZ::FP64BitRegClass; + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + RC = &SystemZ::VR128BitRegClass; + break; + } + + unsigned VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(VA.getLocReg(), VReg); + ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); + } else { + assert(VA.isMemLoc() && "Argument not register or memory"); + + // Create the frame index object for this incoming parameter. + int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + + // Create the SelectionDAG nodes corresponding to a load + // from this parameter. Unpromoted ints and floats are + // passed as right-justified 8-byte values. + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, + DAG.getIntPtrConstant(4, DL)); + ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(MF, FI), false, + false, false, 0); + } + + // Convert the value of the argument register into the value that's + // being passed. + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); + } + + if (IsVarArg) { + // Save the number of non-varargs registers for later use by va_start, etc. + FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); + FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); + + // Likewise the address (in the form of a frame index) of where the + // first stack vararg would be. The 1-byte size here is arbitrary. + int64_t StackSize = CCInfo.getNextStackOffset(); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true)); + + // ...and a similar frame index for the caller-allocated save area + // that will be used to store the incoming registers. + int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); + unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true); + FuncInfo->setRegSaveFrameIndex(RegSaveIndex); + + // Store the FPR varargs in the reserved frame slots. (We store the + // GPRs as part of the prologue.) + if (NumFixedFPRs < SystemZ::NumArgFPRs) { + SDValue MemOps[SystemZ::NumArgFPRs]; + for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { + unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); + int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], + &SystemZ::FP64BitRegClass); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); + MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, + MachinePointerInfo::getFixedStack(MF, FI), + false, false, 0); + } + // Join the stores, which are independent of one another. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + makeArrayRef(&MemOps[NumFixedFPRs], + SystemZ::NumArgFPRs-NumFixedFPRs)); + } + } + + return Chain; +} + +static bool canUseSiblingCall(const CCState &ArgCCInfo, + SmallVectorImpl<CCValAssign> &ArgLocs) { + // Punt if there are any indirect or stack arguments, or if the call + // needs the call-saved argument register R6. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (!VA.isRegLoc()) + return false; + unsigned Reg = VA.getLocReg(); + if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) + return false; + } + return true; +} + +SDValue +SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &DL = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + + // Detect unsupported vector argument and return types. + if (Subtarget.hasVector()) { + VerifyVectorTypes(Outs); + VerifyVectorTypes(Ins); + } + + // Analyze the operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); + + // We don't support GuaranteedTailCallOpt, only automatically-detected + // sibling calls. + if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) + IsTailCall = false; + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + + // Mark the start of the call. + if (!IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getConstant(NumBytes, DL, PtrVT, true), + DL); + + // Copy argument values to their designated locations. + SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + SDValue StackPtr; + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + SDValue ArgValue = OutVals[I]; + + if (VA.getLocInfo() == CCValAssign::Indirect) { + // Store the argument in a stack slot and pass its address. + SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); + int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + MemOpChains.push_back(DAG.getStore( + Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI), false, false, 0)); + ArgValue = SpillSlot; + } else + ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); + + if (VA.isRegLoc()) + // Queue up the argument copies and emit them at the end. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + else { + assert(VA.isMemLoc() && "Argument not register or memory"); + + // Work out the address of the stack slot. Unpromoted ints and + // floats are passed as right-justified 8-byte values. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); + unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset(); + if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) + Offset += 4; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(Offset, DL)); + + // Emit the store. + MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, + MachinePointerInfo(), + false, false, 0)); + } + } + + // Join the stores, which are independent of one another. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // Accept direct calls by converting symbolic call addresses to the + // associated Target* opcodes. Force %r1 to be used for indirect + // tail calls. + SDValue Glue; + if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); + Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); + Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } else if (IsTailCall) { + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); + Glue = Chain.getValue(1); + Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); + } + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, + RegsToPass[I].second, Glue); + Glue = Chain.getValue(1); + } + + // The first call operand is the chain and the second is the target address. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) + Ops.push_back(DAG.getRegister(RegsToPass[I].first, + RegsToPass[I].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies, if any. + if (Glue.getNode()) + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + if (IsTailCall) + return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); + Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Mark the end of the call, which is glued to the call itself. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), + Glue, DL); + Glue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); + RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); + + // Copy all of the result registers out of their specified physreg. + for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { + CCValAssign &VA = RetLocs[I]; + + // Copy the value out, gluing the copy to the end of the call sequence. + SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), + VA.getLocVT(), Glue); + Chain = RetValue.getValue(1); + Glue = RetValue.getValue(2); + + // Convert the value of the return register into the value that's + // being returned. + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); + } + + return Chain; +} + +bool SystemZTargetLowering:: +CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + // Detect unsupported vector return types. + if (Subtarget.hasVector()) + VerifyVectorTypes(Outs); + + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); + return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); +} + +SDValue +SystemZTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Detect unsupported vector return types. + if (Subtarget.hasVector()) + VerifyVectorTypes(Outs); + + // Assign locations to each returned value. + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); + RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); + + // Quick exit for void returns + if (RetLocs.empty()) + return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); + + // Copy the result values into the output registers. + SDValue Glue; + SmallVector<SDValue, 4> RetOps; + RetOps.push_back(Chain); + for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { + CCValAssign &VA = RetLocs[I]; + SDValue RetValue = OutVals[I]; + + // Make the return register live on exit. + assert(VA.isRegLoc() && "Can only return in registers!"); + + // Promote the value as required. + RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); + + // Chain and glue the copies together. + unsigned Reg = VA.getLocReg(); + Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); + } + + // Update chain and glue. + RetOps[0] = Chain; + if (Glue.getNode()) + RetOps.push_back(Glue); + + return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); +} + +SDValue SystemZTargetLowering:: +prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { + return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); +} + +// Return true if Op is an intrinsic node with chain that returns the CC value +// as its only (other) argument. Provide the associated SystemZISD opcode and +// the mask of valid CC values if so. +static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, + unsigned &CCValid) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_tbegin: + Opcode = SystemZISD::TBEGIN; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tbegin_nofloat: + Opcode = SystemZISD::TBEGIN_NOFLOAT; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tend: + Opcode = SystemZISD::TEND; + CCValid = SystemZ::CCMASK_TEND; + return true; + + default: + return false; + } +} + +// Return true if Op is an intrinsic node without chain that returns the +// CC value as its final argument. Provide the associated SystemZISD +// opcode and the mask of valid CC values if so. +static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpkshs: + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + Opcode = SystemZISD::PACKS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vpklshs: + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + Opcode = SystemZISD::PACKLS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vceqbs: + case Intrinsic::s390_vceqhs: + case Intrinsic::s390_vceqfs: + case Intrinsic::s390_vceqgs: + Opcode = SystemZISD::VICMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchbs: + case Intrinsic::s390_vchhs: + case Intrinsic::s390_vchfs: + case Intrinsic::s390_vchgs: + Opcode = SystemZISD::VICMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchlbs: + case Intrinsic::s390_vchlhs: + case Intrinsic::s390_vchlfs: + case Intrinsic::s390_vchlgs: + Opcode = SystemZISD::VICMPHLS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vtm: + Opcode = SystemZISD::VTM; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfaebs: + case Intrinsic::s390_vfaehs: + case Intrinsic::s390_vfaefs: + Opcode = SystemZISD::VFAE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfaezbs: + case Intrinsic::s390_vfaezhs: + case Intrinsic::s390_vfaezfs: + Opcode = SystemZISD::VFAEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeebs: + case Intrinsic::s390_vfeehs: + case Intrinsic::s390_vfeefs: + Opcode = SystemZISD::VFEE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeezbs: + case Intrinsic::s390_vfeezhs: + case Intrinsic::s390_vfeezfs: + Opcode = SystemZISD::VFEEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenebs: + case Intrinsic::s390_vfenehs: + case Intrinsic::s390_vfenefs: + Opcode = SystemZISD::VFENE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenezbs: + case Intrinsic::s390_vfenezhs: + case Intrinsic::s390_vfenezfs: + Opcode = SystemZISD::VFENEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vistrbs: + case Intrinsic::s390_vistrhs: + case Intrinsic::s390_vistrfs: + Opcode = SystemZISD::VISTR_CC; + CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; + return true; + + case Intrinsic::s390_vstrcbs: + case Intrinsic::s390_vstrchs: + case Intrinsic::s390_vstrcfs: + Opcode = SystemZISD::VSTRC_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrczbs: + case Intrinsic::s390_vstrczhs: + case Intrinsic::s390_vstrczfs: + Opcode = SystemZISD::VSTRCZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfcedbs: + Opcode = SystemZISD::VFCMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchdbs: + Opcode = SystemZISD::VFCMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchedbs: + Opcode = SystemZISD::VFCMPHES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vftcidb: + Opcode = SystemZISD::VFTCI; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + default: + return false; + } +} + +// Emit an intrinsic with chain with a glued value instead of its CC result. +static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + Ops.push_back(Op.getOperand(0)); + for (unsigned I = 2; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue OldChain = SDValue(Op.getNode(), 1); + SDValue NewChain = SDValue(Intr.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); + return Intr; +} + +// Emit an intrinsic with a glued value instead of its CC result. +static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + for (unsigned I = 1; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + if (Op->getNumValues() == 1) + return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); + assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); + SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); + return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); +} + +// CC is a comparison that will be implemented using an integer or +// floating-point comparison. Return the condition code mask for +// a branch on true. In the integer case, CCMASK_CMP_UO is set for +// unsigned comparisons and clear for signed ones. In the floating-point +// case, CCMASK_CMP_UO has its normal mask meaning (unordered). +static unsigned CCMaskForCondCode(ISD::CondCode CC) { +#define CONV(X) \ + case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ + case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ + case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X + + switch (CC) { + default: + llvm_unreachable("Invalid integer condition!"); + + CONV(EQ); + CONV(NE); + CONV(GT); + CONV(GE); + CONV(LT); + CONV(LE); + + case ISD::SETO: return SystemZ::CCMASK_CMP_O; + case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; + } +#undef CONV +} + +// Return a sequence for getting a 1 from an IPM result when CC has a +// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. +// The handling of CC values outside CCValid doesn't matter. +static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { + // Deal with cases where the result can be taken directly from a bit + // of the IPM result. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC); + if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC + 1); + + // Deal with cases where we can add a value to force the sign bit + // to contain the right value. Putting the bit in 31 means we can + // use SRL rather than RISBG(L), and also makes it easier to get a + // 0/-1 value, so it has priority over the other tests below. + // + // These sequences rely on the fact that the upper two bits of the + // IPM result are zero. + uint64_t TopBit = uint64_t(1) << 31; + if (CCMask == (CCValid & SystemZ::CCMASK_0)) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) + return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_2))) + return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_3)) + return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_1 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); + + // Next try inverting the value and testing a bit. 0/1 could be + // handled this way too, but we dealt with that case above. + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) + return IPMConversion(-1, 0, SystemZ::IPM_CC); + + // Handle cases where adding a value forces a non-sign bit to contain + // the right value. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) + return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); + + // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are + // can be done by inverting the low CC bit and applying one of the + // sign-based extractions above. + if (CCMask == (CCValid & SystemZ::CCMASK_1)) + return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_2)) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (1 << SystemZ::IPM_CC), 31); + + llvm_unreachable("Unexpected CC combination"); +} + +// If C can be converted to a comparison against zero, adjust the operands +// as necessary. +static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (C.ICmpType == SystemZICMP::UnsignedOnly) + return; + + auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); + if (!ConstOp1) + return; + + int64_t Value = ConstOp1->getSExtValue(); + if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || + (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || + (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || + (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { + C.CCMask ^= SystemZ::CCMASK_CMP_EQ; + C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); + } +} + +// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, +// adjust the operands as necessary. +static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + // For us to make any changes, it must a comparison between a single-use + // load and a constant. + if (!C.Op0.hasOneUse() || + C.Op0.getOpcode() != ISD::LOAD || + C.Op1.getOpcode() != ISD::Constant) + return; + + // We must have an 8- or 16-bit load. + auto *Load = cast<LoadSDNode>(C.Op0); + unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); + if (NumBits != 8 && NumBits != 16) + return; + + // The load must be an extending one and the constant must be within the + // range of the unextended value. + auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); + uint64_t Value = ConstOp1->getZExtValue(); + uint64_t Mask = (1 << NumBits) - 1; + if (Load->getExtensionType() == ISD::SEXTLOAD) { + // Make sure that ConstOp1 is in range of C.Op0. + int64_t SignedValue = ConstOp1->getSExtValue(); + if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) + return; + if (C.ICmpType != SystemZICMP::SignedOnly) { + // Unsigned comparison between two sign-extended values is equivalent + // to unsigned comparison between two zero-extended values. + Value &= Mask; + } else if (NumBits == 8) { + // Try to treat the comparison as unsigned, so that we can use CLI. + // Adjust CCMask and Value as necessary. + if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) + // Test whether the high bit of the byte is set. + Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; + else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) + // Test whether the high bit of the byte is clear. + Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; + else + // No instruction exists for this combination. + return; + C.ICmpType = SystemZICMP::UnsignedOnly; + } + } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { + if (Value > Mask) + return; + // If the constant is in range, we can use any comparison. + C.ICmpType = SystemZICMP::Any; + } else + return; + + // Make sure that the first operand is an i32 of the right extension type. + ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? + ISD::SEXTLOAD : + ISD::ZEXTLOAD); + if (C.Op0.getValueType() != MVT::i32 || + Load->getExtensionType() != ExtType) + C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, + Load->getChain(), Load->getBasePtr(), + Load->getPointerInfo(), Load->getMemoryVT(), + Load->isVolatile(), Load->isNonTemporal(), + Load->isInvariant(), Load->getAlignment()); + + // Make sure that the second operand is an i32 with the right value. + if (C.Op1.getValueType() != MVT::i32 || + Value != ConstOp1->getZExtValue()) + C.Op1 = DAG.getConstant(Value, DL, MVT::i32); +} + +// Return true if Op is either an unextended load, or a load suitable +// for integer register-memory comparisons of type ICmpType. +static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { + auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); + if (Load) { + // There are no instructions to compare a register with a memory byte. + if (Load->getMemoryVT() == MVT::i8) + return false; + // Otherwise decide on extension type. + switch (Load->getExtensionType()) { + case ISD::NON_EXTLOAD: + return true; + case ISD::SEXTLOAD: + return ICmpType != SystemZICMP::UnsignedOnly; + case ISD::ZEXTLOAD: + return ICmpType != SystemZICMP::SignedOnly; + default: + break; + } + } + return false; +} + +// Return true if it is better to swap the operands of C. +static bool shouldSwapCmpOperands(const Comparison &C) { + // Leave f128 comparisons alone, since they have no memory forms. + if (C.Op0.getValueType() == MVT::f128) + return false; + + // Always keep a floating-point constant second, since comparisons with + // zero can use LOAD TEST and comparisons with other constants make a + // natural memory operand. + if (isa<ConstantFPSDNode>(C.Op1)) + return false; + + // Never swap comparisons with zero since there are many ways to optimize + // those later. + auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); + if (ConstOp1 && ConstOp1->getZExtValue() == 0) + return false; + + // Also keep natural memory operands second if the loaded value is + // only used here. Several comparisons have memory forms. + if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) + return false; + + // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. + // In that case we generally prefer the memory to be second. + if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { + // The only exceptions are when the second operand is a constant and + // we can use things like CHHSI. + if (!ConstOp1) + return true; + // The unsigned memory-immediate instructions can handle 16-bit + // unsigned integers. + if (C.ICmpType != SystemZICMP::SignedOnly && + isUInt<16>(ConstOp1->getZExtValue())) + return false; + // The signed memory-immediate instructions can handle 16-bit + // signed integers. + if (C.ICmpType != SystemZICMP::UnsignedOnly && + isInt<16>(ConstOp1->getSExtValue())) + return false; + return true; + } + + // Try to promote the use of CGFR and CLGFR. + unsigned Opcode0 = C.Op0.getOpcode(); + if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) + return true; + if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) + return true; + if (C.ICmpType != SystemZICMP::SignedOnly && + Opcode0 == ISD::AND && + C.Op0.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) + return true; + + return false; +} + +// Return a version of comparison CC mask CCMask in which the LT and GT +// actions are swapped. +static unsigned reverseCCMask(unsigned CCMask) { + return ((CCMask & SystemZ::CCMASK_CMP_EQ) | + (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | + (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | + (CCMask & SystemZ::CCMASK_CMP_UO)); +} + +// Check whether C tests for equality between X and Y and whether X - Y +// or Y - X is also computed. In that case it's better to compare the +// result of the subtraction against zero. +static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE) { + for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { + SDNode *N = *I; + if (N->getOpcode() == ISD::SUB && + ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || + (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { + C.Op0 = SDValue(N, 0); + C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); + return; + } + } + } +} + +// Check whether C compares a floating-point value with zero and if that +// floating-point value is also negated. In this case we can use the +// negation to set CC, so avoiding separate LOAD AND TEST and +// LOAD (NEGATIVE/COMPLEMENT) instructions. +static void adjustForFNeg(Comparison &C) { + auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); + if (C1 && C1->isZero()) { + for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { + SDNode *N = *I; + if (N->getOpcode() == ISD::FNEG) { + C.Op0 = SDValue(N, 0); + C.CCMask = reverseCCMask(C.CCMask); + return; + } + } + } +} + +// Check whether C compares (shl X, 32) with 0 and whether X is +// also sign-extended. In that case it is better to test the result +// of the sign extension using LTGFR. +// +// This case is important because InstCombine transforms a comparison +// with (sext (trunc X)) into a comparison with (shl X, 32). +static void adjustForLTGFR(Comparison &C) { + // Check for a comparison between (shl X, 32) and 0. + if (C.Op0.getOpcode() == ISD::SHL && + C.Op0.getValueType() == MVT::i64 && + C.Op1.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { + auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); + if (C1 && C1->getZExtValue() == 32) { + SDValue ShlOp0 = C.Op0.getOperand(0); + // See whether X has any SIGN_EXTEND_INREG uses. + for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) { + SDNode *N = *I; + if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { + C.Op0 = SDValue(N, 0); + return; + } + } + } + } +} + +// If C compares the truncation of an extending load, try to compare +// the untruncated value instead. This exposes more opportunities to +// reuse CC. +static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (C.Op0.getOpcode() == ISD::TRUNCATE && + C.Op0.getOperand(0).getOpcode() == ISD::LOAD && + C.Op1.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { + auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); + if (L->getMemoryVT().getStoreSizeInBits() + <= C.Op0.getValueType().getSizeInBits()) { + unsigned Type = L->getExtensionType(); + if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || + (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { + C.Op0 = C.Op0.getOperand(0); + C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); + } + } + } +} + +// Return true if shift operation N has an in-range constant shift value. +// Store it in ShiftVal if so. +static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { + auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!Shift) + return false; + + uint64_t Amount = Shift->getZExtValue(); + if (Amount >= N.getValueType().getSizeInBits()) + return false; + + ShiftVal = Amount; + return true; +} + +// Check whether an AND with Mask is suitable for a TEST UNDER MASK +// instruction and whether the CC value is descriptive enough to handle +// a comparison of type Opcode between the AND result and CmpVal. +// CCMask says which comparison result is being tested and BitSize is +// the number of bits in the operands. If TEST UNDER MASK can be used, +// return the corresponding CC mask, otherwise return 0. +static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, + uint64_t Mask, uint64_t CmpVal, + unsigned ICmpType) { + assert(Mask != 0 && "ANDs with zero should have been removed by now"); + + // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. + if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && + !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) + return 0; + + // Work out the masks for the lowest and highest bits. + unsigned HighShift = 63 - countLeadingZeros(Mask); + uint64_t High = uint64_t(1) << HighShift; + uint64_t Low = uint64_t(1) << countTrailingZeros(Mask); + + // Signed ordered comparisons are effectively unsigned if the sign + // bit is dropped. + bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); + + // Check for equality comparisons with 0, or the equivalent. + if (CmpVal == 0) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal <= Low) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal < Low) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_SOME_1; + } + + // Check for equality comparisons with the mask, or the equivalent. + if (CmpVal == Mask) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_SOME_0; + } + + // Check for ordered comparisons with the top bit. + if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_MSB_1; + } + if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_MSB_1; + } + + // If there are just two bits, we can do equality checks for Low and High + // as well. + if (Mask == Low + High) { + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; + } + + // Looks like we've exhausted our options. + return 0; +} + +// See whether C can be implemented as a TEST UNDER MASK instruction. +// Update the arguments with the TM version if so. +static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + // Check that we have a comparison with a constant. + auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); + if (!ConstOp1) + return; + uint64_t CmpVal = ConstOp1->getZExtValue(); + + // Check whether the nonconstant input is an AND with a constant mask. + Comparison NewC(C); + uint64_t MaskVal; + ConstantSDNode *Mask = nullptr; + if (C.Op0.getOpcode() == ISD::AND) { + NewC.Op0 = C.Op0.getOperand(0); + NewC.Op1 = C.Op0.getOperand(1); + Mask = dyn_cast<ConstantSDNode>(NewC.Op1); + if (!Mask) + return; + MaskVal = Mask->getZExtValue(); + } else { + // There is no instruction to compare with a 64-bit immediate + // so use TMHH instead if possible. We need an unsigned ordered + // comparison with an i64 immediate. + if (NewC.Op0.getValueType() != MVT::i64 || + NewC.CCMask == SystemZ::CCMASK_CMP_EQ || + NewC.CCMask == SystemZ::CCMASK_CMP_NE || + NewC.ICmpType == SystemZICMP::SignedOnly) + return; + // Convert LE and GT comparisons into LT and GE. + if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || + NewC.CCMask == SystemZ::CCMASK_CMP_GT) { + if (CmpVal == uint64_t(-1)) + return; + CmpVal += 1; + NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; + } + // If the low N bits of Op1 are zero than the low N bits of Op0 can + // be masked off without changing the result. + MaskVal = -(CmpVal & -CmpVal); + NewC.ICmpType = SystemZICMP::UnsignedOnly; + } + if (!MaskVal) + return; + + // Check whether the combination of mask, comparison value and comparison + // type are suitable. + unsigned BitSize = NewC.Op0.getValueType().getSizeInBits(); + unsigned NewCCMask, ShiftVal; + if (NewC.ICmpType != SystemZICMP::SignedOnly && + NewC.Op0.getOpcode() == ISD::SHL && + isSimpleShift(NewC.Op0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, + MaskVal >> ShiftVal, + CmpVal >> ShiftVal, + SystemZICMP::Any))) { + NewC.Op0 = NewC.Op0.getOperand(0); + MaskVal >>= ShiftVal; + } else if (NewC.ICmpType != SystemZICMP::SignedOnly && + NewC.Op0.getOpcode() == ISD::SRL && + isSimpleShift(NewC.Op0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, + MaskVal << ShiftVal, + CmpVal << ShiftVal, + SystemZICMP::UnsignedOnly))) { + NewC.Op0 = NewC.Op0.getOperand(0); + MaskVal <<= ShiftVal; + } else { + NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, + NewC.ICmpType); + if (!NewCCMask) + return; + } + + // Go ahead and make the change. + C.Opcode = SystemZISD::TM; + C.Op0 = NewC.Op0; + if (Mask && Mask->getZExtValue() == MaskVal) + C.Op1 = SDValue(Mask, 0); + else + C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); + C.CCValid = SystemZ::CCMASK_TM; + C.CCMask = NewCCMask; +} + +// Return a Comparison that tests the condition-code result of intrinsic +// node Call against constant integer CC using comparison code Cond. +// Opcode is the opcode of the SystemZISD operation for the intrinsic +// and CCValid is the set of possible condition-code results. +static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, + SDValue Call, unsigned CCValid, uint64_t CC, + ISD::CondCode Cond) { + Comparison C(Call, SDValue()); + C.Opcode = Opcode; + C.CCValid = CCValid; + if (Cond == ISD::SETEQ) + // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. + C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; + else if (Cond == ISD::SETNE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; + else if (Cond == ISD::SETLT || Cond == ISD::SETULT) + // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, + // always true for CC>3. + C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; + else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; + else if (Cond == ISD::SETLE || Cond == ISD::SETULE) + // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), + // always true for CC>3. + C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; + else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; + else + llvm_unreachable("Unexpected integer comparison type"); + C.CCMask &= CCValid; + return C; +} + +// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. +static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, + ISD::CondCode Cond, SDLoc DL) { + if (CmpOp1.getOpcode() == ISD::Constant) { + uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); + unsigned Opcode, CCValid; + if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && + CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && + isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && + isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + } + Comparison C(CmpOp0, CmpOp1); + C.CCMask = CCMaskForCondCode(Cond); + if (C.Op0.getValueType().isFloatingPoint()) { + C.CCValid = SystemZ::CCMASK_FCMP; + C.Opcode = SystemZISD::FCMP; + adjustForFNeg(C); + } else { + C.CCValid = SystemZ::CCMASK_ICMP; + C.Opcode = SystemZISD::ICMP; + // Choose the type of comparison. Equality and inequality tests can + // use either signed or unsigned comparisons. The choice also doesn't + // matter if both sign bits are known to be clear. In those cases we + // want to give the main isel code the freedom to choose whichever + // form fits best. + if (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE || + (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) + C.ICmpType = SystemZICMP::Any; + else if (C.CCMask & SystemZ::CCMASK_CMP_UO) + C.ICmpType = SystemZICMP::UnsignedOnly; + else + C.ICmpType = SystemZICMP::SignedOnly; + C.CCMask &= ~SystemZ::CCMASK_CMP_UO; + adjustZeroCmp(DAG, DL, C); + adjustSubwordCmp(DAG, DL, C); + adjustForSubtraction(DAG, DL, C); + adjustForLTGFR(C); + adjustICmpTruncate(DAG, DL, C); + } + + if (shouldSwapCmpOperands(C)) { + std::swap(C.Op0, C.Op1); + C.CCMask = reverseCCMask(C.CCMask); + } + + adjustForTestUnderMask(DAG, DL, C); + return C; +} + +// Emit the comparison instruction described by C. +static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (!C.Op1.getNode()) { + SDValue Op; + switch (C.Op0.getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); + break; + case ISD::INTRINSIC_WO_CHAIN: + Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); + break; + default: + llvm_unreachable("Invalid comparison operands"); + } + return SDValue(Op.getNode(), Op->getNumValues() - 1); + } + if (C.Opcode == SystemZISD::ICMP) + return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, + DAG.getConstant(C.ICmpType, DL, MVT::i32)); + if (C.Opcode == SystemZISD::TM) { + bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != + bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); + return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, + DAG.getConstant(RegisterOnly, DL, MVT::i32)); + } + return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); +} + +// Implement a 32-bit *MUL_LOHI operation by extending both operands to +// 64 bits. Extend is the extension type to use. Store the high part +// in Hi and the low part in Lo. +static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, + unsigned Extend, SDValue Op0, SDValue Op1, + SDValue &Hi, SDValue &Lo) { + Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); + Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); + SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, + DAG.getConstant(32, DL, MVT::i64)); + Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); + Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); +} + +// Lower a binary operation that produces two VT results, one in each +// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, +// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation +// on the extended Op0 and (unextended) Op1. Store the even register result +// in Even and the odd register result in Odd. +static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, + unsigned Extend, unsigned Opcode, + SDValue Op0, SDValue Op1, + SDValue &Even, SDValue &Odd) { + SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); + SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, + SDValue(In128, 0), Op1); + bool Is32Bit = is32Bit(VT); + Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); + Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); +} + +// Return an i32 value that is 1 if the CC value produced by Glue is +// in the mask CCMask and 0 otherwise. CC is known to have a value +// in CCValid, so other values can be ignored. +static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, + unsigned CCValid, unsigned CCMask) { + IPMConversion Conversion = getIPMConversion(CCValid, CCMask); + SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + + if (Conversion.XORValue) + Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, + DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); + + if (Conversion.AddValue) + Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, + DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); + + // The SHR/AND sequence should get optimized to an RISBG. + Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, + DAG.getConstant(Conversion.Bit, DL, MVT::i32)); + if (Conversion.Bit != 31) + Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, + DAG.getConstant(1, DL, MVT::i32)); + return Result; +} + +// Return the SystemISD vector comparison operation for CC, or 0 if it cannot +// be done directly. IsFP is true if CC is for a floating-point rather than +// integer comparison. +static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { + switch (CC) { + case ISD::SETOEQ: + case ISD::SETEQ: + return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; + + case ISD::SETOGE: + case ISD::SETGE: + return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0); + + case ISD::SETOGT: + case ISD::SETGT: + return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; + + case ISD::SETUGT: + return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL; + + default: + return 0; + } +} + +// Return the SystemZISD vector comparison operation for CC or its inverse, +// or 0 if neither can be done directly. Indicate in Invert whether the +// result is for the inverse of CC. IsFP is true if CC is for a +// floating-point rather than integer comparison. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, + bool &Invert) { + if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + Invert = false; + return Opcode; + } + + CC = ISD::getSetCCInverse(CC, !IsFP); + if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + Invert = true; + return Opcode; + } + + return 0; +} + +// Return a v2f64 that contains the extended form of elements Start and Start+1 +// of v4f32 value Op. +static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, + SDValue Op) { + int Mask[] = { Start, -1, Start + 1, -1 }; + Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); + return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); +} + +// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, +// producing a result of type VT. +static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, + EVT VT, SDValue CmpOp0, SDValue CmpOp1) { + // There is no hardware support for v4f32, so extend the vector into + // two v2f64s and compare those. + if (CmpOp0.getValueType() == MVT::v4f32) { + SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); + SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); + SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); + SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); + SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); + return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + } + return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); +} + +// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing +// an integer mask of type VT. +static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, + ISD::CondCode CC, SDValue CmpOp0, + SDValue CmpOp1) { + bool IsFP = CmpOp0.getValueType().isFloatingPoint(); + bool Invert = false; + SDValue Cmp; + switch (CC) { + // Handle tests for order using (or (ogt y x) (oge x y)). + case ISD::SETUO: + Invert = true; + case ISD::SETO: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); + SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + break; + } + + // Handle <> tests using (or (ogt y x) (ogt x y)). + case ISD::SETUEQ: + Invert = true; + case ISD::SETONE: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); + SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + break; + } + + // Otherwise a single comparison is enough. It doesn't really + // matter whether we try the inversion or the swap first, since + // there are no cases where both work. + default: + if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); + else { + CC = ISD::getSetCCSwappedOperands(CC); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); + else + llvm_unreachable("Unhandled comparison"); + } + break; + } + if (Invert) { + SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(65535, DL, MVT::i32)); + Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); + Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); + } + return Cmp; +} + +SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + if (VT.isVector()) + return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); + SDValue Glue = emitCmp(DAG, DL, C); + return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); +} + +SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue CmpOp0 = Op.getOperand(2); + SDValue CmpOp1 = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + SDLoc DL(Op); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); + SDValue Glue = emitCmp(DAG, DL, C); + return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), + Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); +} + +// Return true if Pos is CmpOp and Neg is the negative of CmpOp, +// allowing Pos and Neg to be wider than CmpOp. +static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { + return (Neg.getOpcode() == ISD::SUB && + Neg.getOperand(0).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 && + Neg.getOperand(1) == Pos && + (Pos == CmpOp || + (Pos.getOpcode() == ISD::SIGN_EXTEND && + Pos.getOperand(0) == CmpOp))); +} + +// Return the absolute or negative absolute of Op; IsNegative decides which. +static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, + bool IsNegative) { + Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); + if (IsNegative) + Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), + DAG.getConstant(0, DL, Op.getValueType()), Op); + return Op; +} + +SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + SDValue TrueOp = Op.getOperand(2); + SDValue FalseOp = Op.getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + SDLoc DL(Op); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); + + // Check for absolute and negative-absolute selections, including those + // where the comparison value is sign-extended (for LPGFR and LNGFR). + // This check supplements the one in DAGCombiner. + if (C.Opcode == SystemZISD::ICMP && + C.CCMask != SystemZ::CCMASK_CMP_EQ && + C.CCMask != SystemZ::CCMASK_CMP_NE && + C.Op1.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { + if (isAbsolute(C.Op0, TrueOp, FalseOp)) + return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); + if (isAbsolute(C.Op0, FalseOp, TrueOp)) + return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); + } + + SDValue Glue = emitCmp(DAG, DL, C); + + // Special case for handling -1/0 results. The shifts we use here + // should get optimized with the IPM conversion sequence. + auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp); + auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp); + if (TrueC && FalseC) { + int64_t TrueVal = TrueC->getSExtValue(); + int64_t FalseVal = FalseC->getSExtValue(); + if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { + // Invert the condition if we want -1 on false. + if (TrueVal == 0) + C.CCMask ^= C.CCValid; + SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); + EVT VT = Op.getValueType(); + // Extend the result to VT. Upper bits are ignored. + if (!is32Bit(VT)) + Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); + // Sign-extend from the low bit. + SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); + return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); + } + } + + SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); +} + +SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + SDLoc DL(Node); + const GlobalValue *GV = Node->getGlobal(); + int64_t Offset = Node->getOffset(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + Reloc::Model RM = DAG.getTarget().getRelocationModel(); + CodeModel::Model CM = DAG.getTarget().getCodeModel(); + + SDValue Result; + if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { + // Assign anchors at 1<<12 byte boundaries. + uint64_t Anchor = Offset & ~uint64_t(0xfff); + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + + // The offset can be folded into the address if it is aligned to a halfword. + Offset -= Anchor; + if (Offset != 0 && (Offset & 1) == 0) { + SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); + Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); + Offset = 0; + } + } else { + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); + } + + // If there was a non-zero offset that we didn't fold, create an explicit + // addition for it. + if (Offset != 0) + Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, + DAG.getConstant(Offset, DL, PtrVT)); + + return Result; +} + +SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, + unsigned Opcode, + SDValue GOTOffset) const { + SDLoc DL(Node); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Chain = DAG.getEntryNode(); + SDValue Glue; + + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); + Glue = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); + Glue = Chain.getValue(1); + + // The first call operand is the chain and the second is the TLS symbol. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, + Node->getValueType(0), + 0, 0)); + + // Add argument registers to the end of the list so that they are + // known live into the call. + Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); + Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies. + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Copy the return value from %r2. + return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); +} + +SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(Node, DAG); + SDLoc DL(Node); + const GlobalValue *GV = Node->getGlobal(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + + // The high part of the thread pointer is in access register 0. + SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, + DAG.getConstant(0, DL, MVT::i32)); + TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); + + // The low part of the thread pointer is in access register 1. + SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, + DAG.getConstant(1, DL, MVT::i32)); + TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); + + // Merge them into a single 64-bit address. + SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, + DAG.getConstant(32, DL, PtrVT)); + SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); + + // Get the offset of GA from the thread pointer, based on the TLS model. + SDValue Offset; + switch (model) { + case TLSModel::GeneralDynamic: { + // Load the GOT offset of the tls_index (module ID / per-symbol offset). + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); + + // Call __tls_get_offset to retrieve the offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); + break; + } + + case TLSModel::LocalDynamic: { + // Load the GOT offset of the module ID. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); + + // Call __tls_get_offset to retrieve the module base offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); + + // Note: The SystemZLDCleanupPass will remove redundant computations + // of the module base offset. Count total number of local-dynamic + // accesses to trigger execution of that pass. + SystemZMachineFunctionInfo* MFI = + DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); + MFI->incNumLocalDynamicTLSAccesses(); + + // Add the per-symbol offset. + CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); + + SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); + DTPOffset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), DTPOffset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); + + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); + break; + } + + case TLSModel::InitialExec: { + // Load the offset from the GOT. + Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + SystemZII::MO_INDNTPOFF); + Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); + break; + } + + case TLSModel::LocalExec: { + // Force the offset into the constant pool and load it from there. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); + break; + } + } + + // Add the base and offset together. + return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); +} + +SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, + SelectionDAG &DAG) const { + SDLoc DL(Node); + const BlockAddress *BA = Node->getBlockAddress(); + int64_t Offset = Node->getOffset(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + return Result; +} + +SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, + SelectionDAG &DAG) const { + SDLoc DL(JT); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + + // Use LARL to load the address of the table. + return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); +} + +SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, + SelectionDAG &DAG) const { + SDLoc DL(CP); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + SDValue Result; + if (CP->isMachineConstantPoolEntry()) + Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); + else + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment(), CP->getOffset()); + + // Use LARL to load the address of the constant pool entry. + return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); +} + +SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue In = Op.getOperand(0); + EVT InVT = In.getValueType(); + EVT ResVT = Op.getValueType(); + + // Convert loads directly. This is normally done by DAGCombiner, + // but we need this case for bitcasts that are created during lowering + // and which are then lowered themselves. + if (auto *LoadN = dyn_cast<LoadSDNode>(In)) + return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), + LoadN->getMemOperand()); + + if (InVT == MVT::i32 && ResVT == MVT::f32) { + SDValue In64; + if (Subtarget.hasHighWord()) { + SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, + MVT::i64); + In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, + MVT::i64, SDValue(U64, 0), In); + } else { + In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); + In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, + DAG.getConstant(32, DL, MVT::i64)); + } + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); + return DAG.getTargetExtractSubreg(SystemZ::subreg_r32, + DL, MVT::f32, Out64); + } + if (InVT == MVT::f32 && ResVT == MVT::i32) { + SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); + SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL, + MVT::f64, SDValue(U64, 0), In); + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); + if (Subtarget.hasHighWord()) + return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, + MVT::i32, Out64); + SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, + DAG.getConstant(32, DL, MVT::i64)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); + } + llvm_unreachable("Unexpected bitcast combination"); +} + +SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo<SystemZMachineFunctionInfo>(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + SDLoc DL(Op); + + // The initial values of each field. + const unsigned NumFields = 4; + SDValue Fields[NumFields] = { + DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), + DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), + DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) + }; + + // Store each field into its respective slot. + SDValue MemOps[NumFields]; + unsigned Offset = 0; + for (unsigned I = 0; I < NumFields; ++I) { + SDValue FieldAddr = Addr; + if (Offset != 0) + FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, + DAG.getIntPtrConstant(Offset, DL)); + MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, + MachinePointerInfo(SV, Offset), + false, false, 0); + Offset += 8; + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); +} + +SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue DstPtr = Op.getOperand(1); + SDValue SrcPtr = Op.getOperand(2); + const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); + SDLoc DL(Op); + + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), + /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + /*isTailCall*/false, + MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); +} + +SDValue SystemZTargetLowering:: +lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + bool RealignOpt = !DAG.getMachineFunction().getFunction()-> + hasFnAttribute("no-realign-stack"); + + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDLoc DL(Op); + + // If user has set the no alignment function attribute, ignore + // alloca alignments. + uint64_t AlignVal = (RealignOpt ? + dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0); + + uint64_t StackAlign = TFI->getStackAlignment(); + uint64_t RequiredAlign = std::max(AlignVal, StackAlign); + uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; + + unsigned SPReg = getStackPointerRegisterToSaveRestore(); + SDValue NeededSpace = Size; + + // Get a reference to the stack pointer. + SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); + + // Add extra space for alignment if needed. + if (ExtraAlignSpace) + NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + + // Get the new stack pointer value. + SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); + + // Copy the new stack pointer back. + Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + + // The allocated data lives above the 160 bytes allocated for the standard + // frame, plus any outgoing stack arguments. We don't know how much that + // amounts to yet, so emit a special ADJDYNALLOC placeholder. + SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); + SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); + + // Dynamically realign if needed. + if (RequiredAlign > StackAlign) { + Result = + DAG.getNode(ISD::ADD, DL, MVT::i64, Result, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + Result = + DAG.getNode(ISD::AND, DL, MVT::i64, Result, + DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); + } + + SDValue Ops[2] = { Result, Chain }; + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue Ops[2]; + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else { + // Do a full 128-bit multiplication based on UMUL_LOHI64: + // + // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) + // + // but using the fact that the upper halves are either all zeros + // or all ones: + // + // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) + // + // and grouping the right terms together since they are quicker than the + // multiplication: + // + // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) + SDValue C63 = DAG.getConstant(63, DL, MVT::i64); + SDValue LL = Op.getOperand(0); + SDValue RL = Op.getOperand(1); + SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); + SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. SMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + LL, RL, Ops[1], Ops[0]); + SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); + SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); + SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); + Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); + } + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue Ops[2]; + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. UMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, + SelectionDAG &DAG) const { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + unsigned Opcode; + + // We use DSGF for 32-bit division. + if (is32Bit(VT)) { + Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); + Opcode = SystemZISD::SDIVREM32; + } else if (DAG.ComputeNumSignBits(Op1) > 32) { + Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); + Opcode = SystemZISD::SDIVREM32; + } else + Opcode = SystemZISD::SDIVREM64; + + // DSG(F) takes a 64-bit dividend, so the even register in the GR128 + // input is "don't care". The instruction returns the remainder in + // the even register and the quotient in the odd register. + SDValue Ops[2]; + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, + Op0, Op1, Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + // DL(G) uses a double-width dividend, so we need to clear the even + // register in the GR128 input. The instruction returns the remainder + // in the even register and the quotient in the odd register. + SDValue Ops[2]; + if (is32Bit(VT)) + lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + else + lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, DL); +} + +SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); + + // Get the known-zero masks for each operand. + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; + APInt KnownZero[2], KnownOne[2]; + DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); + DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); + + // See if the upper 32 bits of one operand and the lower 32 bits of the + // other are known zero. They are the low and high operands respectively. + uint64_t Masks[] = { KnownZero[0].getZExtValue(), + KnownZero[1].getZExtValue() }; + unsigned High, Low; + if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) + High = 1, Low = 0; + else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) + High = 0, Low = 1; + else + return Op; + + SDValue LowOp = Ops[Low]; + SDValue HighOp = Ops[High]; + + // If the high part is a constant, we're better off using IILH. + if (HighOp.getOpcode() == ISD::Constant) + return Op; + + // If the low part is a constant that is outside the range of LHI, + // then we're better off using IILF. + if (LowOp.getOpcode() == ISD::Constant) { + int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue()); + if (!isInt<16>(Value)) + return Op; + } + + // Check whether the high part is an AND that doesn't change the + // high 32 bits and just masks out low bits. We can skip it if so. + if (HighOp.getOpcode() == ISD::AND && + HighOp.getOperand(1).getOpcode() == ISD::Constant) { + SDValue HighOp0 = HighOp.getOperand(0); + uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue(); + if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) + HighOp = HighOp0; + } + + // Take advantage of the fact that all GR32 operations only change the + // low 32 bits by truncating Low to an i32 and inserting it directly + // using a subreg. The interesting cases are those where the truncation + // can be folded. + SDLoc DL(Op); + SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); + return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, + MVT::i64, HighOp, Low32); +} + +SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + Op = Op.getOperand(0); + + // Handle vector types via VPOPCT. + if (VT.isVector()) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); + switch (VT.getVectorElementType().getSizeInBits()) { + case 8: + break; + case 16: { + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + SDValue Shift = DAG.getConstant(8, DL, MVT::i32); + SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); + break; + } + case 32: { + SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(0, DL, MVT::i32)); + Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); + break; + } + case 64: { + SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(0, DL, MVT::i32)); + Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); + Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); + break; + } + default: + llvm_unreachable("Unexpected type"); + } + return Op; + } + + // Get the known-zero mask for the operand. + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Op, KnownZero, KnownOne); + unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + if (NumSignificantBits == 0) + return DAG.getConstant(0, DL, VT); + + // Skip known-zero high parts of the operand. + int64_t OrigBitSize = VT.getSizeInBits(); + int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); + BitSize = std::min(BitSize, OrigBitSize); + + // The POPCNT instruction counts the number of bits in each byte. + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + + // Add up per-byte counts in a binary tree. All bits of Op at + // position larger than BitSize remain zero throughout. + for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { + SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); + if (BitSize != OrigBitSize) + Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, + DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + } + + // Extract overall result from high byte. + if (BitSize > 8) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(BitSize - 8, DL, VT)); + + return Op; +} + +// Op is an atomic load. Lower it into a normal volatile load. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), + Node->getChain(), Node->getBasePtr(), + Node->getMemoryVT(), Node->getMemOperand()); +} + +// Op is an atomic store. Lower it into a normal volatile store followed +// by a serialization. +SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), + Node->getBasePtr(), Node->getMemoryVT(), + Node->getMemOperand()); + return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, + Chain), 0); +} + +// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first +// two into the fullword ATOMIC_LOADW_* operation given by Opcode. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, + SelectionDAG &DAG, + unsigned Opcode) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + + // 32-bit operations need no code outside the main loop. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = MVT::i32; + if (NarrowVT == WideVT) + return Op; + + int64_t BitSize = NarrowVT.getSizeInBits(); + SDValue ChainIn = Node->getChain(); + SDValue Addr = Node->getBasePtr(); + SDValue Src2 = Node->getVal(); + MachineMemOperand *MMO = Node->getMemOperand(); + SDLoc DL(Node); + EVT PtrVT = Addr.getValueType(); + + // Convert atomic subtracts of constants into additions. + if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) + if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { + Opcode = SystemZISD::ATOMIC_LOADW_ADD; + Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); + } + + // Get the address of the containing word. + SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, + DAG.getConstant(-4, DL, PtrVT)); + + // Get the number of bits that the word must be rotated left in order + // to bring the field to the top bits of a GR32. + SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, + DAG.getConstant(3, DL, PtrVT)); + BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); + + // Get the complementing shift amount, for rotating a field in the top + // bits back to its proper position. + SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, + DAG.getConstant(0, DL, WideVT), BitShift); + + // Extend the source operand to 32 bits and prepare it for the inner loop. + // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other + // operations require the source to be shifted in advance. (This shift + // can be folded if the source is constant.) For AND and NAND, the lower + // bits must be set, while for other opcodes they should be left clear. + if (Opcode != SystemZISD::ATOMIC_SWAPW) + Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, + DAG.getConstant(32 - BitSize, DL, WideVT)); + if (Opcode == SystemZISD::ATOMIC_LOADW_AND || + Opcode == SystemZISD::ATOMIC_LOADW_NAND) + Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, + DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); + + // Construct the ATOMIC_LOADW_* node. + SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); + SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, + DAG.getConstant(BitSize, DL, WideVT) }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, + NarrowVT, MMO); + + // Rotate the result of the final CS so that the field is in the lower + // bits of a GR32, then truncate it. + SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, + DAG.getConstant(BitSize, DL, WideVT)); + SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); + + SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; + return DAG.getMergeValues(RetOps, DL); +} + +// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations +// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit +// operations into additions. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + EVT MemVT = Node->getMemoryVT(); + if (MemVT == MVT::i32 || MemVT == MVT::i64) { + // A full-width operation. + assert(Op.getValueType() == MemVT && "Mismatched VTs"); + SDValue Src2 = Node->getVal(); + SDValue NegSrc2; + SDLoc DL(Src2); + + if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) { + // Use an addition if the operand is constant and either LAA(G) is + // available or the negative value is in the range of A(G)FHI. + int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); + if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) + NegSrc2 = DAG.getConstant(Value, DL, MemVT); + } else if (Subtarget.hasInterlockedAccess1()) + // Use LAA(G) if available. + NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), + Src2); + + if (NegSrc2.getNode()) + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, + Node->getChain(), Node->getBasePtr(), NegSrc2, + Node->getMemOperand(), Node->getOrdering(), + Node->getSynchScope()); + + // Use the node as-is. + return Op; + } + + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); +} + +// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two +// into a fullword ATOMIC_CMP_SWAPW operation. +SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast<AtomicSDNode>(Op.getNode()); + + // We have native support for 32-bit compare and swap. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = MVT::i32; + if (NarrowVT == WideVT) + return Op; + + int64_t BitSize = NarrowVT.getSizeInBits(); + SDValue ChainIn = Node->getOperand(0); + SDValue Addr = Node->getOperand(1); + SDValue CmpVal = Node->getOperand(2); + SDValue SwapVal = Node->getOperand(3); + MachineMemOperand *MMO = Node->getMemOperand(); + SDLoc DL(Node); + EVT PtrVT = Addr.getValueType(); + + // Get the address of the containing word. + SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, + DAG.getConstant(-4, DL, PtrVT)); + + // Get the number of bits that the word must be rotated left in order + // to bring the field to the top bits of a GR32. + SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, + DAG.getConstant(3, DL, PtrVT)); + BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); + + // Get the complementing shift amount, for rotating a field in the top + // bits back to its proper position. + SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, + DAG.getConstant(0, DL, WideVT), BitShift); + + // Construct the ATOMIC_CMP_SWAPW node. + SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); + SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, + NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, + VTList, Ops, NarrowVT, MMO); + return AtomicOp; +} + +SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); + return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), + SystemZ::R15D, Op.getValueType()); +} + +SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); + return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op), + SystemZ::R15D, Op.getOperand(1)); +} + +SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, + SelectionDAG &DAG) const { + bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + if (!IsData) + // Just preserve the chain. + return Op.getOperand(0); + + SDLoc DL(Op); + bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; + auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); + SDValue Ops[] = { + Op.getOperand(0), + DAG.getConstant(Code, DL, MVT::i32), + Op.getOperand(1) + }; + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, + Node->getVTList(), Ops, + Node->getMemoryVT(), Node->getMemOperand()); +} + +// Return an i32 that contains the value of CC immediately after After, +// whose final operand must be MVT::Glue. +static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { + SDLoc DL(After); + SDValue Glue = SDValue(After, After->getNumValues() - 1); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); + return SDValue(); + } + + return SDValue(); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCC(Op, Opcode, CCValid)) { + SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + if (Op->getNumValues() == 1) + return CC; + assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued, + CC); + } + + unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpdi: + return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vperm: + return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vuphb: + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplhb: + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplb: + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vupllb: + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vsumb: + case Intrinsic::s390_vsumh: + case Intrinsic::s390_vsumgh: + case Intrinsic::s390_vsumgf: + case Intrinsic::s390_vsumqf: + case Intrinsic::s390_vsumqg: + return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + + return SDValue(); +} + +namespace { +// Says that SystemZISD operation Opcode can be used to perform the equivalent +// of a VPERM with permute vector Bytes. If Opcode takes three operands, +// Operand is the constant third operand, otherwise it is the number of +// bytes in each element of the result. +struct Permute { + unsigned Opcode; + unsigned Operand; + unsigned char Bytes[SystemZ::VectorBytes]; +}; +} + +static const Permute PermuteForms[] = { + // VMRHG + { SystemZISD::MERGE_HIGH, 8, + { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, + // VMRHF + { SystemZISD::MERGE_HIGH, 4, + { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, + // VMRHH + { SystemZISD::MERGE_HIGH, 2, + { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, + // VMRHB + { SystemZISD::MERGE_HIGH, 1, + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, + // VMRLG + { SystemZISD::MERGE_LOW, 8, + { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, + // VMRLF + { SystemZISD::MERGE_LOW, 4, + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, + // VMRLH + { SystemZISD::MERGE_LOW, 2, + { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, + // VMRLB + { SystemZISD::MERGE_LOW, 1, + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, + // VPKG + { SystemZISD::PACK, 4, + { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, + // VPKF + { SystemZISD::PACK, 2, + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, + // VPKH + { SystemZISD::PACK, 1, + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, + // VPDI V1, V2, 4 (low half of V1, high half of V2) + { SystemZISD::PERMUTE_DWORDS, 4, + { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, + // VPDI V1, V2, 1 (high half of V1, low half of V2) + { SystemZISD::PERMUTE_DWORDS, 1, + { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } +}; + +// Called after matching a vector shuffle against a particular pattern. +// Both the original shuffle and the pattern have two vector operands. +// OpNos[0] is the operand of the original shuffle that should be used for +// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. +// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and +// set OpNo0 and OpNo1 to the shuffle operands that should actually be used +// for operands 0 and 1 of the pattern. +static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { + if (OpNos[0] < 0) { + if (OpNos[1] < 0) + return false; + OpNo0 = OpNo1 = OpNos[1]; + } else if (OpNos[1] < 0) { + OpNo0 = OpNo1 = OpNos[0]; + } else { + OpNo0 = OpNos[0]; + OpNo1 = OpNos[1]; + } + return true; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Return true if the VPERM can be implemented using P. +// When returning true set OpNo0 to the VPERM operand that should be +// used for operand 0 of P and likewise OpNo1 for operand 1 of P. +// +// For example, if swapping the VPERM operands allows P to match, OpNo0 +// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one +// operand, but rewriting it to use two duplicated operands allows it to +// match P, then OpNo0 and OpNo1 will be the same. +static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, + unsigned &OpNo0, unsigned &OpNo1) { + int OpNos[] = { -1, -1 }; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { + int Elt = Bytes[I]; + if (Elt >= 0) { + // Make sure that the two permute vectors use the same suboperand + // byte number. Only the operand numbers (the high bits) are + // allowed to differ. + if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) + return false; + int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; + int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; + // Make sure that the operand mappings are consistent with previous + // elements. + if (OpNos[ModelOpNo] == 1 - RealOpNo) + return false; + OpNos[ModelOpNo] = RealOpNo; + } + } + return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); +} + +// As above, but search for a matching permute. +static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, + unsigned &OpNo0, unsigned &OpNo1) { + for (auto &P : PermuteForms) + if (matchPermute(Bytes, P, OpNo0, OpNo1)) + return &P; + return nullptr; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. This permute is an operand of an outer permute. +// See whether redistributing the -1 bytes gives a shuffle that can be +// implemented using P. If so, set Transform to a VPERM-like permute vector +// that, when applied to the result of P, gives the original permute in Bytes. +static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, + const Permute &P, + SmallVectorImpl<int> &Transform) { + unsigned To = 0; + for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { + int Elt = Bytes[From]; + if (Elt < 0) + // Byte number From of the result is undefined. + Transform[From] = -1; + else { + while (P.Bytes[To] != Elt) { + To += 1; + if (To == SystemZ::VectorBytes) + return false; + } + Transform[From] = To; + } + } + return true; +} + +// As above, but search for a matching permute. +static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, + SmallVectorImpl<int> &Transform) { + for (auto &P : PermuteForms) + if (matchDoublePermute(Bytes, P, Transform)) + return &P; + return nullptr; +} + +// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask, +// as if it had type vNi8. +static void getVPermMask(ShuffleVectorSDNode *VSN, + SmallVectorImpl<int> &Bytes) { + EVT VT = VSN->getValueType(0); + unsigned NumElements = VT.getVectorNumElements(); + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) { + int Index = VSN->getMaskElt(I); + if (Index >= 0) + for (unsigned J = 0; J < BytesPerElement; ++J) + Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + } +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of +// the result come from a contiguous sequence of bytes from one input. +// Set Base to the selector for the first byte if so. +static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, + unsigned BytesPerElement, int &Base) { + Base = -1; + for (unsigned I = 0; I < BytesPerElement; ++I) { + if (Bytes[Start + I] >= 0) { + unsigned Elem = Bytes[Start + I]; + if (Base < 0) { + Base = Elem - I; + // Make sure the bytes would come from one input operand. + if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) + return false; + } else if (unsigned(Base) != Elem - I) + return false; + } + } + return true; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Return true if it can be performed using VSLDI. +// When returning true, set StartIndex to the shift amount and OpNo0 +// and OpNo1 to the VPERM operands that should be used as the first +// and second shift operand respectively. +static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, + unsigned &StartIndex, unsigned &OpNo0, + unsigned &OpNo1) { + int OpNos[] = { -1, -1 }; + int Shift = -1; + for (unsigned I = 0; I < 16; ++I) { + int Index = Bytes[I]; + if (Index >= 0) { + int ExpectedShift = (Index - I) % SystemZ::VectorBytes; + int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; + int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; + if (Shift < 0) + Shift = ExpectedShift; + else if (Shift != ExpectedShift) + return false; + // Make sure that the operand mappings are consistent with previous + // elements. + if (OpNos[ModelOpNo] == 1 - RealOpNo) + return false; + OpNos[ModelOpNo] = RealOpNo; + } + } + StartIndex = Shift; + return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); +} + +// Create a node that performs P on operands Op0 and Op1, casting the +// operands to the appropriate type. The type of the result is determined by P. +static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, + const Permute &P, SDValue Op0, SDValue Op1) { + // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input + // elements of a PACK are twice as wide as the outputs. + unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : + P.Opcode == SystemZISD::PACK ? P.Operand * 2 : + P.Operand); + // Cast both operands to the appropriate type. + MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), + SystemZ::VectorBytes / InBytes); + Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); + SDValue Op; + if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { + SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32); + Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); + } else if (P.Opcode == SystemZISD::PACK) { + MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), + SystemZ::VectorBytes / P.Operand); + Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); + } else { + Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); + } + return Op; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Implement it on operands Ops[0] and Ops[1] using +// VSLDI or VPERM. +static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, + const SmallVectorImpl<int> &Bytes) { + for (unsigned I = 0; I < 2; ++I) + Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); + + // First see whether VSLDI can be used. + unsigned StartIndex, OpNo0, OpNo1; + if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) + return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], + Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32)); + + // Fall back on VPERM. Construct an SDNode for the permute vector. + SDValue IndexNodes[SystemZ::VectorBytes]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= 0) + IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); + else + IndexNodes[I] = DAG.getUNDEF(MVT::i32); + SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes); + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); +} + +namespace { +// Describes a general N-operand vector shuffle. +struct GeneralShuffle { + GeneralShuffle(EVT vt) : VT(vt) {} + void addUndef(); + void add(SDValue, unsigned); + SDValue getNode(SelectionDAG &, SDLoc); + + // The operands of the shuffle. + SmallVector<SDValue, SystemZ::VectorBytes> Ops; + + // Index I is -1 if byte I of the result is undefined. Otherwise the + // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand + // Bytes[I] / SystemZ::VectorBytes. + SmallVector<int, SystemZ::VectorBytes> Bytes; + + // The type of the shuffle result. + EVT VT; +}; +} + +// Add an extra undefined element to the shuffle. +void GeneralShuffle::addUndef() { + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + for (unsigned I = 0; I < BytesPerElement; ++I) + Bytes.push_back(-1); +} + +// Add an extra element to the shuffle, taking it from element Elem of Op. +// A null Op indicates a vector input whose value will be calculated later; +// there is at most one such input per shuffle and it always has the same +// type as the result. +void GeneralShuffle::add(SDValue Op, unsigned Elem) { + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + + // The source vector can have wider elements than the result, + // either through an explicit TRUNCATE or because of type legalization. + // We want the least significant part. + EVT FromVT = Op.getNode() ? Op.getValueType() : VT; + unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); + assert(FromBytesPerElement >= BytesPerElement && + "Invalid EXTRACT_VECTOR_ELT"); + unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + + (FromBytesPerElement - BytesPerElement)); + + // Look through things like shuffles and bitcasts. + while (Op.getNode()) { + if (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { + // See whether the bytes we need come from a contiguous part of one + // operand. + SmallVector<int, SystemZ::VectorBytes> OpBytes; + getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes); + int NewByte; + if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) + break; + if (NewByte < 0) { + addUndef(); + return; + } + Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); + Byte = unsigned(NewByte) % SystemZ::VectorBytes; + } else if (Op.getOpcode() == ISD::UNDEF) { + addUndef(); + return; + } else + break; + } + + // Make sure that the source of the extraction is in Ops. + unsigned OpNo = 0; + for (; OpNo < Ops.size(); ++OpNo) + if (Ops[OpNo] == Op) + break; + if (OpNo == Ops.size()) + Ops.push_back(Op); + + // Add the element to Bytes. + unsigned Base = OpNo * SystemZ::VectorBytes + Byte; + for (unsigned I = 0; I < BytesPerElement; ++I) + Bytes.push_back(Base + I); +} + +// Return SDNodes for the completed shuffle. +SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { + assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); + + if (Ops.size() == 0) + return DAG.getUNDEF(VT); + + // Make sure that there are at least two shuffle operands. + if (Ops.size() == 1) + Ops.push_back(DAG.getUNDEF(MVT::v16i8)); + + // Create a tree of shuffles, deferring root node until after the loop. + // Try to redistribute the undefined elements of non-root nodes so that + // the non-root shuffles match something like a pack or merge, then adjust + // the parent node's permute vector to compensate for the new order. + // Among other things, this copes with vectors like <2 x i16> that were + // padded with undefined elements during type legalization. + // + // In the best case this redistribution will lead to the whole tree + // using packs and merges. It should rarely be a loss in other cases. + unsigned Stride = 1; + for (; Stride * 2 < Ops.size(); Stride *= 2) { + for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { + SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; + + // Create a mask for just these two operands. + SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { + unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; + unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; + if (OpNo == I) + NewBytes[J] = Byte; + else if (OpNo == I + Stride) + NewBytes[J] = SystemZ::VectorBytes + Byte; + else + NewBytes[J] = -1; + } + // See if it would be better to reorganize NewMask to avoid using VPERM. + SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); + if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { + Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); + // Applying NewBytesMap to Ops[I] gets back to NewBytes. + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { + if (NewBytes[J] >= 0) { + assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && + "Invalid double permute"); + Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; + } else + assert(NewBytesMap[J] < 0 && "Invalid double permute"); + } + } else { + // Just use NewBytes on the operands. + Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) + if (NewBytes[J] >= 0) + Bytes[J] = I * SystemZ::VectorBytes + J; + } + } + } + + // Now we just have 2 inputs. Put the second operand in Ops[1]. + if (Stride > 1) { + Ops[1] = Ops[Stride]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= int(SystemZ::VectorBytes)) + Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; + } + + // Look for an instruction that can do the permute without resorting + // to VPERM. + unsigned OpNo0, OpNo1; + SDValue Op; + if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) + Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); + else + Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + +// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. +static bool isScalarToVector(SDValue Op) { + for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) + if (Op.getOperand(I).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +// Return a vector of type VT that contains Value in the first element. +// The other elements don't matter. +static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, + SDValue Value) { + // If we have a constant, replicate it to all elements and let the + // BUILD_VECTOR lowering take care of it. + if (Value.getOpcode() == ISD::Constant || + Value.getOpcode() == ISD::ConstantFP) { + SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + if (Value.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); +} + +// Return a vector of type VT in which Op0 is in element 0 and Op1 is in +// element 1. Used for cases in which replication is cheap. +static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, + SDValue Op0, SDValue Op1) { + if (Op0.getOpcode() == ISD::UNDEF) { + if (Op1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); + } + if (Op1.getOpcode() == ISD::UNDEF) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); + return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, + buildScalarToVector(DAG, DL, VT, Op0), + buildScalarToVector(DAG, DL, VT, Op1)); +} + +// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 +// vector for them. +static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, + SDValue Op1) { + if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(MVT::v2i64); + // If one of the two inputs is undefined then replicate the other one, + // in order to avoid using another register unnecessarily. + if (Op0.getOpcode() == ISD::UNDEF) + Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); + else if (Op1.getOpcode() == ISD::UNDEF) + Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + else { + Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); + } + return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); +} + +// Try to represent constant BUILD_VECTOR node BVN using a +// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask +// on success. +static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { + EVT ElemVT = BVN->getValueType(0).getVectorElementType(); + unsigned BytesPerElement = ElemVT.getStoreSize(); + for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { + SDValue Op = BVN->getOperand(I); + if (Op.getOpcode() != ISD::UNDEF) { + uint64_t Value; + if (Op.getOpcode() == ISD::Constant) + Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue(); + else if (Op.getOpcode() == ISD::ConstantFP) + Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt() + .getZExtValue()); + else + return false; + for (unsigned J = 0; J < BytesPerElement; ++J) { + uint64_t Byte = (Value >> (J * 8)) & 0xff; + if (Byte == 0xff) + Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); + else if (Byte != 0) + return false; + } + } + } + return true; +} + +// Try to load a vector constant in which BitsPerElement-bit value Value +// is replicated to fill the vector. VT is the type of the resulting +// constant, which may have elements of a different size from BitsPerElement. +// Return the SDValue of the constant on success, otherwise return +// an empty value. +static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, + const SystemZInstrInfo *TII, + SDLoc DL, EVT VT, uint64_t Value, + unsigned BitsPerElement) { + // Signed 16-bit values can be replicated using VREPI. + int64_t SignedValue = SignExtend64(Value, BitsPerElement); + if (isInt<16>(SignedValue)) { + MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), + SystemZ::VectorBits / BitsPerElement); + SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT, + DAG.getConstant(SignedValue, DL, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + // See whether rotating the constant left some N places gives a value that + // is one less than a power of 2 (i.e. all zeros followed by all ones). + // If so we can use VGM. + unsigned Start, End; + if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { + // isRxSBGMask returns the bit numbers for a full 64-bit value, + // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to + // bit numbers for an BitsPerElement value, so that 0 denotes + // 1 << (BitsPerElement-1). + Start -= 64 - BitsPerElement; + End -= 64 - BitsPerElement; + MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), + SystemZ::VectorBits / BitsPerElement); + SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT, + DAG.getConstant(Start, DL, MVT::i32), + DAG.getConstant(End, DL, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + return SDValue(); +} + +// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually +// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for +// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR +// would benefit from this representation and return it if so. +static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, + BuildVectorSDNode *BVN) { + EVT VT = BVN->getValueType(0); + unsigned NumElements = VT.getVectorNumElements(); + + // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation + // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still + // need a BUILD_VECTOR, add an additional placeholder operand for that + // BUILD_VECTOR and store its operands in ResidueOps. + GeneralShuffle GS(VT); + SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; + bool FoundOne = false; + for (unsigned I = 0; I < NumElements; ++I) { + SDValue Op = BVN->getOperand(I); + if (Op.getOpcode() == ISD::TRUNCATE) + Op = Op.getOperand(0); + if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op.getOperand(1).getOpcode() == ISD::Constant) { + unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + GS.add(Op.getOperand(0), Elem); + FoundOne = true; + } else if (Op.getOpcode() == ISD::UNDEF) { + GS.addUndef(); + } else { + GS.add(SDValue(), ResidueOps.size()); + ResidueOps.push_back(BVN->getOperand(I)); + } + } + + // Nothing to do if there are no EXTRACT_VECTOR_ELTs. + if (!FoundOne) + return SDValue(); + + // Create the BUILD_VECTOR for the remaining elements, if any. + if (!ResidueOps.empty()) { + while (ResidueOps.size() < NumElements) + ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); + for (auto &Op : GS.Ops) { + if (!Op.getNode()) { + Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps); + break; + } + } + } + return GS.getNode(DAG, SDLoc(BVN)); +} + +// Combine GPR scalar values Elems into a vector of type VT. +static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) { + // See whether there is a single replicated value. + SDValue Single; + unsigned int NumElements = Elems.size(); + unsigned int Count = 0; + for (auto Elem : Elems) { + if (Elem.getOpcode() != ISD::UNDEF) { + if (!Single.getNode()) + Single = Elem; + else if (Elem != Single) { + Single = SDValue(); + break; + } + Count += 1; + } + } + // There are three cases here: + // + // - if the only defined element is a loaded one, the best sequence + // is a replicating load. + // + // - otherwise, if the only defined element is an i64 value, we will + // end up with the same VLVGP sequence regardless of whether we short-cut + // for replication or fall through to the later code. + // + // - otherwise, if the only defined element is an i32 or smaller value, + // we would need 2 instructions to replicate it: VLVGP followed by VREPx. + // This is only a win if the single defined element is used more than once. + // In other cases we're better off using a single VLVGx. + if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); + + // The best way of building a v2i64 from two i64s is to use VLVGP. + if (VT == MVT::v2i64) + return joinDwords(DAG, DL, Elems[0], Elems[1]); + + // Use a 64-bit merge high to combine two doubles. + if (VT == MVT::v2f64) + return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); + + // Build v4f32 values directly from the FPRs: + // + // <Axxx> <Bxxx> <Cxxxx> <Dxxx> + // V V VMRHF + // <ABxx> <CDxx> + // V VMRHG + // <ABCD> + if (VT == MVT::v4f32) { + SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); + SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); + // Avoid unnecessary undefs by reusing the other operand. + if (Op01.getOpcode() == ISD::UNDEF) + Op01 = Op23; + else if (Op23.getOpcode() == ISD::UNDEF) + Op23 = Op01; + // Merging identical replications is a no-op. + if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) + return Op01; + Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); + Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); + SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, + DL, MVT::v2i64, Op01, Op23); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + + // Collect the constant terms. + SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); + SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); + + unsigned NumConstants = 0; + for (unsigned I = 0; I < NumElements; ++I) { + SDValue Elem = Elems[I]; + if (Elem.getOpcode() == ISD::Constant || + Elem.getOpcode() == ISD::ConstantFP) { + NumConstants += 1; + Constants[I] = Elem; + Done[I] = true; + } + } + // If there was at least one constant, fill in the other elements of + // Constants with undefs to get a full vector constant and use that + // as the starting point. + SDValue Result; + if (NumConstants > 0) { + for (unsigned I = 0; I < NumElements; ++I) + if (!Constants[I].getNode()) + Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants); + } else { + // Otherwise try to use VLVGP to start the sequence in order to + // avoid a false dependency on any previous contents of the vector + // register. This only makes sense if one of the associated elements + // is defined. + unsigned I1 = NumElements / 2 - 1; + unsigned I2 = NumElements - 1; + bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF); + bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF); + if (Def1 || Def2) { + SDValue Elem1 = Elems[Def1 ? I1 : I2]; + SDValue Elem2 = Elems[Def2 ? I2 : I1]; + Result = DAG.getNode(ISD::BITCAST, DL, VT, + joinDwords(DAG, DL, Elem1, Elem2)); + Done[I1] = true; + Done[I2] = true; + } else + Result = DAG.getUNDEF(VT); + } + + // Use VLVGx to insert the other elements. + for (unsigned I = 0; I < NumElements; ++I) + if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF) + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], + DAG.getConstant(I, DL, MVT::i32)); + return Result; +} + +SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + if (BVN->isConstant()) { + // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- + // preferred way of creating all-zero and all-one vectors so give it + // priority over other methods below. + uint64_t Mask = 0; + if (tryBuildVectorByteMask(BVN, Mask)) { + SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(Mask, DL, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + + // Try using some form of replication. + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, true) && + SplatBitSize <= 64) { + // First try assuming that any undefined bits above the highest set bit + // and below the lowest set bit are 1s. This increases the likelihood of + // being able to use a sign-extended element value in VECTOR REPLICATE + // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. + uint64_t SplatBitsZ = SplatBits.getZExtValue(); + uint64_t SplatUndefZ = SplatUndef.getZExtValue(); + uint64_t Lower = (SplatUndefZ + & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); + uint64_t Upper = (SplatUndefZ + & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); + uint64_t Value = SplatBitsZ | Upper | Lower; + SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, + SplatBitSize); + if (Op.getNode()) + return Op; + + // Now try assuming that any undefined bits between the first and + // last defined set bits are set. This increases the chances of + // using a non-wraparound mask. + uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; + Value = SplatBitsZ | Middle; + Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); + if (Op.getNode()) + return Op; + } + + // Fall back to loading it from memory. + return SDValue(); + } + + // See if we should use shuffles to construct the vector from other vectors. + SDValue Res = tryBuildVectorShuffle(DAG, BVN); + if (Res.getNode()) + return Res; + + // Detect SCALAR_TO_VECTOR conversions. + if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) + return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); + + // Otherwise use buildVector to build the vector up from GPRs. + unsigned NumElements = Op.getNumOperands(); + SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + Ops[I] = Op.getOperand(I); + return buildVector(DAG, DL, VT, Ops); +} + +SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned NumElements = VT.getVectorNumElements(); + + if (VSN->isSplat()) { + SDValue Op0 = Op.getOperand(0); + unsigned Index = VSN->getSplatIndex(); + assert(Index < VT.getVectorNumElements() && + "Splat index should be defined and in first operand"); + // See whether the value we're splatting is directly available as a scalar. + if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || + Op0.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); + // Otherwise keep it as a vector-to-vector operation. + return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), + DAG.getConstant(Index, DL, MVT::i32)); + } + + GeneralShuffle GS(VT); + for (unsigned I = 0; I < NumElements; ++I) { + int Elt = VSN->getMaskElt(I); + if (Elt < 0) + GS.addUndef(); + else + GS.add(Op.getOperand(unsigned(Elt) / NumElements), + unsigned(Elt) % NumElements); + } + return GS.getNode(DAG, SDLoc(VSN)); +} + +SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + // Just insert the scalar into element 0 of an undefined vector. + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + Op.getValueType(), DAG.getUNDEF(Op.getValueType()), + Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); +} + +SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle insertions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + EVT VT = Op.getValueType(); + + // Insertions into constant indices of a v2f64 can be done using VPDI. + // However, if the inserted value is a bitcast or a constant then it's + // better to use GPRs, as below. + if (VT == MVT::v2f64 && + Op1.getOpcode() != ISD::BITCAST && + Op1.getOpcode() != ISD::ConstantFP && + Op2.getOpcode() == ISD::Constant) { + uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Mask = VT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and insert via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), + DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + +SDValue +SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle extractions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + EVT VT = Op.getValueType(); + EVT VecVT = Op0.getValueType(); + + // Extractions of constant indices can be done directly. + if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { + uint64_t Index = CIndexN->getZExtValue(); + unsigned Mask = VecVT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and extract via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + +SDValue +SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, + unsigned UnpackHigh) const { + SDValue PackedOp = Op.getOperand(0); + EVT OutVT = Op.getValueType(); + EVT InVT = PackedOp.getValueType(); + unsigned ToBits = OutVT.getVectorElementType().getSizeInBits(); + unsigned FromBits = InVT.getVectorElementType().getSizeInBits(); + do { + FromBits *= 2; + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), + SystemZ::VectorBits / FromBits); + PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); + } while (FromBits != ToBits); + return PackedOp; +} + +SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, + unsigned ByScalar) const { + // Look for cases where a vector shift can use the *_BY_SCALAR form. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); + + // See whether the shift vector is a splat represented as BUILD_VECTOR. + if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + // Check for constant splats. Use ElemBitSize as the minimum element + // width and reject splats that need wider elements. + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + ElemBitSize, true) && + SplatBitSize == ElemBitSize) { + SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, + DL, MVT::i32); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + // Check for variable splats. + BitVector UndefElements; + SDValue Splat = BVN->getSplatValue(&UndefElements); + if (Splat) { + // Since i32 is the smallest legal type, we either need a no-op + // or a truncation. + SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + } + + // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, + // and the shift amount is directly available in a GPR. + if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { + if (VSN->isSplat()) { + SDValue VSNOp0 = VSN->getOperand(0); + unsigned Index = VSN->getSplatIndex(); + assert(Index < VT.getVectorNumElements() && + "Splat index should be defined and in first operand"); + if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || + VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { + // Since i32 is the smallest legal type, we either need a no-op + // or a truncation. + SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, + VSNOp0.getOperand(Index)); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + } + } + + // Otherwise just treat the current form as legal. + return Op; +} + +SDValue SystemZTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::BR_CC: + return lowerBR_CC(Op, DAG); + case ISD::SELECT_CC: + return lowerSELECT_CC(Op, DAG); + case ISD::SETCC: + return lowerSETCC(Op, DAG); + case ISD::GlobalAddress: + return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); + case ISD::BlockAddress: + return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); + case ISD::JumpTable: + return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); + case ISD::ConstantPool: + return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); + case ISD::BITCAST: + return lowerBITCAST(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::VACOPY: + return lowerVACOPY(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SMUL_LOHI: + return lowerSMUL_LOHI(Op, DAG); + case ISD::UMUL_LOHI: + return lowerUMUL_LOHI(Op, DAG); + case ISD::SDIVREM: + return lowerSDIVREM(Op, DAG); + case ISD::UDIVREM: + return lowerUDIVREM(Op, DAG); + case ISD::OR: + return lowerOR(Op, DAG); + case ISD::CTPOP: + return lowerCTPOP(Op, DAG); + case ISD::CTLZ_ZERO_UNDEF: + return DAG.getNode(ISD::CTLZ, SDLoc(Op), + Op.getValueType(), Op.getOperand(0)); + case ISD::CTTZ_ZERO_UNDEF: + return DAG.getNode(ISD::CTTZ, SDLoc(Op), + Op.getValueType(), Op.getOperand(0)); + case ISD::ATOMIC_SWAP: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); + case ISD::ATOMIC_STORE: + return lowerATOMIC_STORE(Op, DAG); + case ISD::ATOMIC_LOAD: + return lowerATOMIC_LOAD(Op, DAG); + case ISD::ATOMIC_LOAD_ADD: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); + case ISD::ATOMIC_LOAD_SUB: + return lowerATOMIC_LOAD_SUB(Op, DAG); + case ISD::ATOMIC_LOAD_AND: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); + case ISD::ATOMIC_LOAD_OR: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); + case ISD::ATOMIC_LOAD_XOR: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); + case ISD::ATOMIC_LOAD_NAND: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); + case ISD::ATOMIC_LOAD_MIN: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); + case ISD::ATOMIC_LOAD_MAX: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); + case ISD::ATOMIC_LOAD_UMIN: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); + case ISD::ATOMIC_LOAD_UMAX: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); + case ISD::ATOMIC_CMP_SWAP: + return lowerATOMIC_CMP_SWAP(Op, DAG); + case ISD::STACKSAVE: + return lowerSTACKSAVE(Op, DAG); + case ISD::STACKRESTORE: + return lowerSTACKRESTORE(Op, DAG); + case ISD::PREFETCH: + return lowerPREFETCH(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return lowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); + case ISD::SHL: + return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); + case ISD::SRL: + return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); + case ISD::SRA: + return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); + default: + llvm_unreachable("Unexpected node to lower"); + } +} + +const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { +#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME + switch ((SystemZISD::NodeType)Opcode) { + case SystemZISD::FIRST_NUMBER: break; + OPCODE(RET_FLAG); + OPCODE(CALL); + OPCODE(SIBCALL); + OPCODE(TLS_GDCALL); + OPCODE(TLS_LDCALL); + OPCODE(PCREL_WRAPPER); + OPCODE(PCREL_OFFSET); + OPCODE(IABS); + OPCODE(ICMP); + OPCODE(FCMP); + OPCODE(TM); + OPCODE(BR_CCMASK); + OPCODE(SELECT_CCMASK); + OPCODE(ADJDYNALLOC); + OPCODE(EXTRACT_ACCESS); + OPCODE(POPCNT); + OPCODE(UMUL_LOHI64); + OPCODE(SDIVREM32); + OPCODE(SDIVREM64); + OPCODE(UDIVREM32); + OPCODE(UDIVREM64); + OPCODE(MVC); + OPCODE(MVC_LOOP); + OPCODE(NC); + OPCODE(NC_LOOP); + OPCODE(OC); + OPCODE(OC_LOOP); + OPCODE(XC); + OPCODE(XC_LOOP); + OPCODE(CLC); + OPCODE(CLC_LOOP); + OPCODE(STPCPY); + OPCODE(STRCMP); + OPCODE(SEARCH_STRING); + OPCODE(IPM); + OPCODE(SERIALIZE); + OPCODE(TBEGIN); + OPCODE(TBEGIN_NOFLOAT); + OPCODE(TEND); + OPCODE(BYTE_MASK); + OPCODE(ROTATE_MASK); + OPCODE(REPLICATE); + OPCODE(JOIN_DWORDS); + OPCODE(SPLAT); + OPCODE(MERGE_HIGH); + OPCODE(MERGE_LOW); + OPCODE(SHL_DOUBLE); + OPCODE(PERMUTE_DWORDS); + OPCODE(PERMUTE); + OPCODE(PACK); + OPCODE(PACKS_CC); + OPCODE(PACKLS_CC); + OPCODE(UNPACK_HIGH); + OPCODE(UNPACKL_HIGH); + OPCODE(UNPACK_LOW); + OPCODE(UNPACKL_LOW); + OPCODE(VSHL_BY_SCALAR); + OPCODE(VSRL_BY_SCALAR); + OPCODE(VSRA_BY_SCALAR); + OPCODE(VSUM); + OPCODE(VICMPE); + OPCODE(VICMPH); + OPCODE(VICMPHL); + OPCODE(VICMPES); + OPCODE(VICMPHS); + OPCODE(VICMPHLS); + OPCODE(VFCMPE); + OPCODE(VFCMPH); + OPCODE(VFCMPHE); + OPCODE(VFCMPES); + OPCODE(VFCMPHS); + OPCODE(VFCMPHES); + OPCODE(VFTCI); + OPCODE(VEXTEND); + OPCODE(VROUND); + OPCODE(VTM); + OPCODE(VFAE_CC); + OPCODE(VFAEZ_CC); + OPCODE(VFEE_CC); + OPCODE(VFEEZ_CC); + OPCODE(VFENE_CC); + OPCODE(VFENEZ_CC); + OPCODE(VISTR_CC); + OPCODE(VSTRC_CC); + OPCODE(VSTRCZ_CC); + OPCODE(ATOMIC_SWAPW); + OPCODE(ATOMIC_LOADW_ADD); + OPCODE(ATOMIC_LOADW_SUB); + OPCODE(ATOMIC_LOADW_AND); + OPCODE(ATOMIC_LOADW_OR); + OPCODE(ATOMIC_LOADW_XOR); + OPCODE(ATOMIC_LOADW_NAND); + OPCODE(ATOMIC_LOADW_MIN); + OPCODE(ATOMIC_LOADW_MAX); + OPCODE(ATOMIC_LOADW_UMIN); + OPCODE(ATOMIC_LOADW_UMAX); + OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(PREFETCH); + } + return nullptr; +#undef OPCODE +} + +// Return true if VT is a vector whose elements are a whole number of bytes +// in width. +static bool canTreatAsByteVector(EVT VT) { + return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0; +} + +// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT +// producing a result of type ResVT. Op is a possibly bitcast version +// of the input vector and Index is the index (based on type VecVT) that +// should be extracted. Return the new extraction if a simplification +// was possible or if Force is true. +SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, + SDValue Op, unsigned Index, + DAGCombinerInfo &DCI, + bool Force) const { + SelectionDAG &DAG = DCI.DAG; + + // The number of bytes being extracted. + unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); + + for (;;) { + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::BITCAST) + // Look through bitcasts. + Op = Op.getOperand(0); + else if (Opcode == ISD::VECTOR_SHUFFLE && + canTreatAsByteVector(Op.getValueType())) { + // Get a VPERM-like permute mask and see whether the bytes covered + // by the extracted element are a contiguous sequence from one + // source operand. + SmallVector<int, SystemZ::VectorBytes> Bytes; + getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes); + int First; + if (!getShuffleInput(Bytes, Index * BytesPerElement, + BytesPerElement, First)) + break; + if (First < 0) + return DAG.getUNDEF(ResVT); + // Make sure the contiguous sequence starts at a multiple of the + // original element size. + unsigned Byte = unsigned(First) % Bytes.size(); + if (Byte % BytesPerElement != 0) + break; + // We can get the extracted value directly from an input. + Index = Byte / BytesPerElement; + Op = Op.getOperand(unsigned(First) / Bytes.size()); + Force = true; + } else if (Opcode == ISD::BUILD_VECTOR && + canTreatAsByteVector(Op.getValueType())) { + // We can only optimize this case if the BUILD_VECTOR elements are + // at least as wide as the extracted value. + EVT OpVT = Op.getValueType(); + unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); + if (OpBytesPerElement < BytesPerElement) + break; + // Make sure that the least-significant bit of the extracted value + // is the least significant bit of an input. + unsigned End = (Index + 1) * BytesPerElement; + if (End % OpBytesPerElement != 0) + break; + // We're extracting the low part of one operand of the BUILD_VECTOR. + Op = Op.getOperand(End / OpBytesPerElement - 1); + if (!Op.getValueType().isInteger()) { + EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()); + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + DCI.AddToWorklist(Op.getNode()); + } + EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + if (VT != ResVT) { + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); + } + return Op; + } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || + Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && + canTreatAsByteVector(Op.getValueType()) && + canTreatAsByteVector(Op.getOperand(0).getValueType())) { + // Make sure that only the unextended bits are significant. + EVT ExtVT = Op.getValueType(); + EVT OpVT = Op.getOperand(0).getValueType(); + unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); + unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); + unsigned Byte = Index * BytesPerElement; + unsigned SubByte = Byte % ExtBytesPerElement; + unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; + if (SubByte < MinSubByte || + SubByte + BytesPerElement > ExtBytesPerElement) + break; + // Get the byte offset of the unextended element + Byte = Byte / ExtBytesPerElement * OpBytesPerElement; + // ...then add the byte offset relative to that element. + Byte += SubByte - MinSubByte; + if (Byte % BytesPerElement != 0) + break; + Op = Op.getOperand(0); + Index = Byte / BytesPerElement; + Force = true; + } else + break; + } + if (Force) { + if (Op.getValueType() != VecVT) { + Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); + DCI.AddToWorklist(Op.getNode()); + } + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, + DAG.getConstant(Index, DL, MVT::i32)); + } + return SDValue(); +} + +// Optimize vector operations in scalar value Op on the basis that Op +// is truncated to TruncVT. +SDValue +SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, + DAGCombinerInfo &DCI) const { + // If we have (trunc (extract_vector_elt X, Y)), try to turn it into + // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements + // of type TruncVT. + if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + TruncVT.getSizeInBits() % 8 == 0) { + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + if (canTreatAsByteVector(VecVT)) { + if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); + unsigned TruncBytes = TruncVT.getStoreSize(); + if (BytesPerElement % TruncBytes == 0) { + // Calculate the value of Y' in the above description. We are + // splitting the original elements into Scale equal-sized pieces + // and for truncation purposes want the last (least-significant) + // of these pieces for IndexN. This is easiest to do by calculating + // the start index of the following element and then subtracting 1. + unsigned Scale = BytesPerElement / TruncBytes; + unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; + + // Defer the creation of the bitcast from X to combineExtract, + // which might be able to optimize the extraction. + VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), + VecVT.getStoreSize() / TruncBytes); + EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); + return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); + } + } + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::SIGN_EXTEND) { + // Convert (sext (ashr (shl X, C1), C2)) to + // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as + // cheap as narrower ones. + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { + auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + SDValue Inner = N0.getOperand(0); + if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { + if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { + unsigned Extra = (VT.getSizeInBits() - + N0.getValueType().getSizeInBits()); + unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; + unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; + EVT ShiftVT = N0.getOperand(1).getValueType(); + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, + Inner.getOperand(0)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, + DAG.getConstant(NewShlAmt, SDLoc(Inner), + ShiftVT)); + return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, + DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); + } + } + } + } + if (Opcode == SystemZISD::MERGE_HIGH || + Opcode == SystemZISD::MERGE_LOW) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (Op0.getOpcode() == SystemZISD::BYTE_MASK && + cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { + // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF + // for v4f32. + if (Op1 == N->getOperand(0)) + return Op1; + // (z_merge_? 0, X) -> (z_unpackl_? 0, X). + EVT VT = Op1.getValueType(); + unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); + if (ElemBytes <= 4) { + Opcode = (Opcode == SystemZISD::MERGE_HIGH ? + SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); + EVT InVT = VT.changeVectorElementTypeToInteger(); + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), + SystemZ::VectorBytes / ElemBytes / 2); + if (VT != InVT) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); + DCI.AddToWorklist(Op.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + } + } + } + // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better + // for the extraction to be done on a vMiN value, so that we can use VSTE. + // If X has wider elements then convert it to: + // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). + if (Opcode == ISD::STORE) { + auto *SN = cast<StoreSDNode>(N); + EVT MemVT = SN->getMemoryVT(); + if (MemVT.isInteger()) { + SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, + SN->getValue(), DCI); + if (Value.getNode()) { + DCI.AddToWorklist(Value.getNode()); + + // Rewrite the store with the new form of stored value. + return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, + SN->getBasePtr(), SN->getMemoryVT(), + SN->getMemOperand()); + } + } + } + // Try to simplify a vector extraction. + if (Opcode == ISD::EXTRACT_VECTOR_ELT) { + if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + SDValue Op0 = N->getOperand(0); + EVT VecVT = Op0.getValueType(); + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); + } + } + // (join_dwords X, X) == (replicate X) + if (Opcode == SystemZISD::JOIN_DWORDS && + N->getOperand(0) == N->getOperand(1)) + return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), + N->getOperand(0)); + // (fround (extract_vector_elt X 0)) + // (fround (extract_vector_elt X 1)) -> + // (extract_vector_elt (VROUND X) 0) + // (extract_vector_elt (VROUND X) 1) + // + // This is a special case since the target doesn't really support v2f32s. + if (Opcode == ISD::FP_ROUND) { + SDValue Op0 = N->getOperand(0); + if (N->getValueType(0) == MVT::f32 && + Op0.hasOneUse() && + Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getValueType() == MVT::v2f64 && + Op0.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + SDValue Vec = Op0.getOperand(0); + for (auto *U : Vec->uses()) { + if (U != Op0.getNode() && + U->hasOneUse() && + U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + U->getOperand(0) == Vec && + U->getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { + SDValue OtherRound = SDValue(*U->use_begin(), 0); + if (OtherRound.getOpcode() == ISD::FP_ROUND && + OtherRound.getOperand(0) == SDValue(U, 0) && + OtherRound.getValueType() == MVT::f32) { + SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); + DCI.AddToWorklist(VRound.getNode()); + SDValue Extract1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, + VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); + DCI.AddToWorklist(Extract1.getNode()); + DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + SDValue Extract0 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, + VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + return Extract0; + } + } + } + } + } + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Custom insertion +//===----------------------------------------------------------------------===// + +// Create a new basic block after MBB. +static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { + MachineFunction &MF = *MBB->getParent(); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); + MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); + return NewMBB; +} + +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +// Split MBB before MI and return the new block (the one that contains MI). +static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +// Force base value Base into a register before MI. Return the register. +static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, + const SystemZInstrInfo *TII) { + if (Base.isReg()) + return Base.getReg(); + + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg) + .addOperand(Base).addImm(0).addReg(0); + return Reg; +} + +// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. +MachineBasicBlock * +SystemZTargetLowering::emitSelect(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned TrueReg = MI->getOperand(1).getReg(); + unsigned FalseReg = MI->getOperand(2).getReg(); + unsigned CCValid = MI->getOperand(3).getImm(); + unsigned CCMask = MI->getOperand(4).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // # fallthrough to JoinMBB + MBB = FalseMBB; + MBB->addSuccessor(JoinMBB); + + // JoinMBB: + // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] + // ... + MBB = JoinMBB; + BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) + .addReg(TrueReg).addMBB(StartMBB) + .addReg(FalseReg).addMBB(FalseMBB); + + MI->eraseFromParent(); + return JoinMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. +// StoreOpcode is the store to use and Invert says whether the store should +// happen when the condition is false rather than true. If a STORE ON +// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. +MachineBasicBlock * +SystemZTargetLowering::emitCondStore(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const { + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + + unsigned SrcReg = MI->getOperand(0).getReg(); + MachineOperand Base = MI->getOperand(1); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned IndexReg = MI->getOperand(3).getReg(); + unsigned CCValid = MI->getOperand(4).getImm(); + unsigned CCMask = MI->getOperand(5).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); + + // Use STOCOpcode if possible. We could use different store patterns in + // order to avoid matching the index register, but the performance trade-offs + // might be more complicated in that case. + if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { + if (Invert) + CCMask ^= CCValid; + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp) + .addImm(CCValid).addImm(CCMask); + MI->eraseFromParent(); + return MBB; + } + + // Get the condition needed to branch around the store. + if (!Invert) + CCMask ^= CCValid; + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // store %SrcReg, %Disp(%Index,%Base) + // # fallthrough to JoinMBB + MBB = FalseMBB; + BuildMI(MBB, DL, TII->get(StoreOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); + MBB->addSuccessor(JoinMBB); + + MI->eraseFromParent(); + return JoinMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* +// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that +// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. +// BitSize is the width of the field in bits, or 0 if this is a partword +// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize +// is one of the operands. Invert says whether the field should be +// inverted after performing BinOpcode (e.g. for NAND). +MachineBasicBlock * +SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned BinOpcode, + unsigned BitSize, + bool Invert) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool IsSubWord = (BitSize < 32); + + // Extract the operands. Base can be a register or a frame index. + // Src2 can be a register or immediate. + unsigned Dest = MI->getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI->getOperand(1)); + int64_t Disp = MI->getOperand(2).getImm(); + MachineOperand Src2 = earlyUseOperand(MI->getOperand(3)); + unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); + DebugLoc DL = MI->getDebugLoc(); + if (IsSubWord) + BitSize = MI->getOperand(6).getImm(); + + // Subword operations use 32-bit registers. + const TargetRegisterClass *RC = (BitSize <= 32 ? + &SystemZ::GR32BitRegClass : + &SystemZ::GR64BitRegClass); + unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; + unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; + + // Get the right opcodes for the displacement. + LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); + CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + unsigned OrigVal = MRI.createVirtualRegister(RC); + unsigned OldVal = MRI.createVirtualRegister(RC); + unsigned NewVal = (BinOpcode || IsSubWord ? + MRI.createVirtualRegister(RC) : Src2.getReg()); + unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); + unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + + // Insert a basic block for the main loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // ... + // %OrigVal = L Disp(%Base) + // # fall through to LoopMMB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) + .addOperand(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] + // %RotatedOldVal = RLL %OldVal, 0(%BitShift) + // %RotatedNewVal = OP %RotatedOldVal, %Src2 + // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) + // %Dest = CS %OldVal, %NewVal, Disp(%Base) + // JNE LoopMBB + // # fall through to DoneMMB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigVal).addMBB(StartMBB) + .addReg(Dest).addMBB(LoopMBB); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); + if (Invert) { + // Perform the operation normally and then invert every bit of the field. + unsigned Tmp = MRI.createVirtualRegister(RC); + BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) + .addReg(RotatedOldVal).addOperand(Src2); + if (BitSize <= 32) + // XILF with the upper BitSize bits set. + BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) + .addReg(Tmp).addImm(-1U << (32 - BitSize)); + else { + // Use LCGR and add -1 to the result, which is more compact than + // an XILF, XILH pair. + unsigned Tmp2 = MRI.createVirtualRegister(RC); + BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); + BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) + .addReg(Tmp2).addImm(-1); + } + } else if (BinOpcode) + // A simply binary operation. + BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) + .addReg(RotatedOldVal).addOperand(Src2); + else if (IsSubWord) + // Use RISBG to rotate Src2 into position and use it to replace the + // field in RotatedOldVal. + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) + .addReg(RotatedOldVal).addReg(Src2.getReg()) + .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CSOpcode), Dest) + .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI->eraseFromParent(); + return DoneMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo +// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the +// instruction that should be used to compare the current field with the +// minimum or maximum value. KeepOldMask is the BRC condition-code mask +// for when the current field should be kept. BitSize is the width of +// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. +MachineBasicBlock * +SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned CompareOpcode, + unsigned KeepOldMask, + unsigned BitSize) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool IsSubWord = (BitSize < 32); + + // Extract the operands. Base can be a register or a frame index. + unsigned Dest = MI->getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI->getOperand(1)); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned Src2 = MI->getOperand(3).getReg(); + unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); + DebugLoc DL = MI->getDebugLoc(); + if (IsSubWord) + BitSize = MI->getOperand(6).getImm(); + + // Subword operations use 32-bit registers. + const TargetRegisterClass *RC = (BitSize <= 32 ? + &SystemZ::GR32BitRegClass : + &SystemZ::GR64BitRegClass); + unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; + unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; + + // Get the right opcodes for the displacement. + LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); + CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + unsigned OrigVal = MRI.createVirtualRegister(RC); + unsigned OldVal = MRI.createVirtualRegister(RC); + unsigned NewVal = MRI.createVirtualRegister(RC); + unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); + unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); + unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + + // Insert 3 basic blocks for the loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); + MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); + + // StartMBB: + // ... + // %OrigVal = L Disp(%Base) + // # fall through to LoopMMB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) + .addOperand(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] + // %RotatedOldVal = RLL %OldVal, 0(%BitShift) + // CompareOpcode %RotatedOldVal, %Src2 + // BRC KeepOldMask, UpdateMBB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigVal).addMBB(StartMBB) + .addReg(Dest).addMBB(UpdateMBB); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CompareOpcode)) + .addReg(RotatedOldVal).addReg(Src2); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); + MBB->addSuccessor(UpdateMBB); + MBB->addSuccessor(UseAltMBB); + + // UseAltMBB: + // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 + // # fall through to UpdateMMB + MBB = UseAltMBB; + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) + .addReg(RotatedOldVal).addReg(Src2) + .addImm(32).addImm(31 + BitSize).addImm(0); + MBB->addSuccessor(UpdateMBB); + + // UpdateMBB: + // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], + // [ %RotatedAltVal, UseAltMBB ] + // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) + // %Dest = CS %OldVal, %NewVal, Disp(%Base) + // JNE LoopMBB + // # fall through to DoneMMB + MBB = UpdateMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) + .addReg(RotatedOldVal).addMBB(LoopMBB) + .addReg(RotatedAltVal).addMBB(UseAltMBB); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CSOpcode), Dest) + .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI->eraseFromParent(); + return DoneMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW +// instruction MI. +MachineBasicBlock * +SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, + MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Extract the operands. Base can be a register or a frame index. + unsigned Dest = MI->getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI->getOperand(1)); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned OrigCmpVal = MI->getOperand(3).getReg(); + unsigned OrigSwapVal = MI->getOperand(4).getReg(); + unsigned BitShift = MI->getOperand(5).getReg(); + unsigned NegBitShift = MI->getOperand(6).getReg(); + int64_t BitSize = MI->getOperand(7).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; + + // Get the right opcodes for the displacement. + unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); + unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + unsigned OrigOldVal = MRI.createVirtualRegister(RC); + unsigned OldVal = MRI.createVirtualRegister(RC); + unsigned CmpVal = MRI.createVirtualRegister(RC); + unsigned SwapVal = MRI.createVirtualRegister(RC); + unsigned StoreVal = MRI.createVirtualRegister(RC); + unsigned RetryOldVal = MRI.createVirtualRegister(RC); + unsigned RetryCmpVal = MRI.createVirtualRegister(RC); + unsigned RetrySwapVal = MRI.createVirtualRegister(RC); + + // Insert 2 basic blocks for the loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); + + // StartMBB: + // ... + // %OrigOldVal = L Disp(%Base) + // # fall through to LoopMMB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) + .addOperand(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] + // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ] + // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] + // %Dest = RLL %OldVal, BitSize(%BitShift) + // ^^ The low BitSize bits contain the field + // of interest. + // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0 + // ^^ Replace the upper 32-BitSize bits of the + // comparison value with those that we loaded, + // so that we can use a full word comparison. + // CR %Dest, %RetryCmpVal + // JNE DoneMBB + // # Fall through to SetMBB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigOldVal).addMBB(StartMBB) + .addReg(RetryOldVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal) + .addReg(OrigCmpVal).addMBB(StartMBB) + .addReg(RetryCmpVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) + .addReg(OrigSwapVal).addMBB(StartMBB) + .addReg(RetrySwapVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest) + .addReg(OldVal).addReg(BitShift).addImm(BitSize); + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) + .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::CR)) + .addReg(Dest).addReg(RetryCmpVal); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); + MBB->addSuccessor(DoneMBB); + MBB->addSuccessor(SetMBB); + + // SetMBB: + // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0 + // ^^ Replace the upper 32-BitSize bits of the new + // value with those that we loaded. + // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) + // ^^ Rotate the new field to its proper position. + // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base) + // JNE LoopMBB + // # fall through to ExitMMB + MBB = SetMBB; + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) + .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) + .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); + BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) + .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI->eraseFromParent(); + return DoneMBB; +} + +// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true +// if the high register of the GR128 value must be cleared or false if +// it's "don't care". SubReg is subreg_l32 when extending a GR32 +// and subreg_l64 when extending a GR64. +MachineBasicBlock * +SystemZTargetLowering::emitExt128(MachineInstr *MI, + MachineBasicBlock *MBB, + bool ClearEven, unsigned SubReg) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); + if (ClearEven) { + unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) + .addImm(0); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) + .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); + In128 = NewIn128; + } + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) + .addReg(In128).addReg(Src).addImm(SubReg); + + MI->eraseFromParent(); + return MBB; +} + +MachineBasicBlock * +SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + MachineOperand DestBase = earlyUseOperand(MI->getOperand(0)); + uint64_t DestDisp = MI->getOperand(1).getImm(); + MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2)); + uint64_t SrcDisp = MI->getOperand(3).getImm(); + uint64_t Length = MI->getOperand(4).getImm(); + + // When generating more than one CLC, all but the last will need to + // branch to the end when a difference is found. + MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? + splitBlockAfter(MI, MBB) : nullptr); + + // Check for the loop form, in which operand 5 is the trip count. + if (MI->getNumExplicitOperands() > 5) { + bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); + + uint64_t StartCountReg = MI->getOperand(5).getReg(); + uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); + uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : + forceReg(MI, DestBase, TII)); + + const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; + uint64_t ThisSrcReg = MRI.createVirtualRegister(RC); + uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg : + MRI.createVirtualRegister(RC)); + uint64_t NextSrcReg = MRI.createVirtualRegister(RC); + uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg : + MRI.createVirtualRegister(RC)); + + RC = &SystemZ::GR64BitRegClass; + uint64_t ThisCountReg = MRI.createVirtualRegister(RC); + uint64_t NextCountReg = MRI.createVirtualRegister(RC); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); + + // StartMBB: + // # fall through to LoopMMB + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %ThisDestReg = phi [ %StartDestReg, StartMBB ], + // [ %NextDestReg, NextMBB ] + // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], + // [ %NextSrcReg, NextMBB ] + // %ThisCountReg = phi [ %StartCountReg, StartMBB ], + // [ %NextCountReg, NextMBB ] + // ( PFD 2, 768+DestDisp(%ThisDestReg) ) + // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) + // ( JLH EndMBB ) + // + // The prefetch is used only for MVC. The JLH is used only for CLC. + MBB = LoopMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) + .addReg(StartDestReg).addMBB(StartMBB) + .addReg(NextDestReg).addMBB(NextMBB); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) + .addReg(StartSrcReg).addMBB(StartMBB) + .addReg(NextSrcReg).addMBB(NextMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) + .addReg(StartCountReg).addMBB(StartMBB) + .addReg(NextCountReg).addMBB(NextMBB); + if (Opcode == SystemZ::MVC) + BuildMI(MBB, DL, TII->get(SystemZ::PFD)) + .addImm(SystemZ::PFD_WRITE) + .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); + BuildMI(MBB, DL, TII->get(Opcode)) + .addReg(ThisDestReg).addImm(DestDisp).addImm(256) + .addReg(ThisSrcReg).addImm(SrcDisp); + if (EndMBB) { + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + } + + // NextMBB: + // %NextDestReg = LA 256(%ThisDestReg) + // %NextSrcReg = LA 256(%ThisSrcReg) + // %NextCountReg = AGHI %ThisCountReg, -1 + // CGHI %NextCountReg, 0 + // JLH LoopMBB + // # fall through to DoneMMB + // + // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + MBB = NextMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) + .addReg(ThisDestReg).addImm(256).addReg(0); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) + .addReg(ThisSrcReg).addImm(256).addReg(0); + BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) + .addReg(ThisCountReg).addImm(-1); + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(NextCountReg).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + DestBase = MachineOperand::CreateReg(NextDestReg, false); + SrcBase = MachineOperand::CreateReg(NextSrcReg, false); + Length &= 255; + MBB = DoneMBB; + } + // Handle any remaining bytes with straight-line code. + while (Length > 0) { + uint64_t ThisLength = std::min(Length, uint64_t(256)); + // The previous iteration might have created out-of-range displacements. + // Apply them using LAY if so. + if (!isUInt<12>(DestDisp)) { + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(DestBase).addImm(DestDisp).addReg(0); + DestBase = MachineOperand::CreateReg(Reg, false); + DestDisp = 0; + } + if (!isUInt<12>(SrcDisp)) { + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(SrcBase).addImm(SrcDisp).addReg(0); + SrcBase = MachineOperand::CreateReg(Reg, false); + SrcDisp = 0; + } + BuildMI(*MBB, MI, DL, TII->get(Opcode)) + .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength) + .addOperand(SrcBase).addImm(SrcDisp); + DestDisp += ThisLength; + SrcDisp += ThisLength; + Length -= ThisLength; + // If there's another CLC to go, branch to the end if a difference + // was found. + if (EndMBB && Length > 0) { + MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + MBB = NextMBB; + } + } + if (EndMBB) { + MBB->addSuccessor(EndMBB); + MBB = EndMBB; + MBB->addLiveIn(SystemZ::CC); + } + + MI->eraseFromParent(); + return MBB; +} + +// Decompose string pseudo-instruction MI into a loop that continually performs +// Opcode until CC != 3. +MachineBasicBlock * +SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { + MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + uint64_t End1Reg = MI->getOperand(0).getReg(); + uint64_t Start1Reg = MI->getOperand(1).getReg(); + uint64_t Start2Reg = MI->getOperand(2).getReg(); + uint64_t CharReg = MI->getOperand(3).getReg(); + + const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; + uint64_t This1Reg = MRI.createVirtualRegister(RC); + uint64_t This2Reg = MRI.createVirtualRegister(RC); + uint64_t End2Reg = MRI.createVirtualRegister(RC); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // # fall through to LoopMMB + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] + // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] + // R0L = %CharReg + // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L + // JO LoopMBB + // # fall through to DoneMMB + // + // The load of R0L can be hoisted by post-RA LICM. + MBB = LoopMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) + .addReg(Start1Reg).addMBB(StartMBB) + .addReg(End1Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) + .addReg(Start2Reg).addMBB(StartMBB) + .addReg(End2Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); + BuildMI(MBB, DL, TII->get(Opcode)) + .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) + .addReg(This1Reg).addReg(This2Reg); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + DoneMBB->addLiveIn(SystemZ::CC); + + MI->eraseFromParent(); + return DoneMBB; +} + +// Update TBEGIN instruction with final opcode and register clobbers. +MachineBasicBlock * +SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const { + MachineFunction &MF = *MBB->getParent(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + // Update opcode. + MI->setDesc(TII->get(Opcode)); + + // We cannot handle a TBEGIN that clobbers the stack or frame pointer. + // Make sure to add the corresponding GRSM bits if they are missing. + uint64_t Control = MI->getOperand(2).getImm(); + static const unsigned GPRControlBit[16] = { + 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, + 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 + }; + Control |= GPRControlBit[15]; + if (TFI->hasFP(MF)) + Control |= GPRControlBit[11]; + MI->getOperand(2).setImm(Control); + + // Add GPR clobbers. + for (int I = 0; I < 16; I++) { + if ((Control & GPRControlBit[I]) == 0) { + unsigned Reg = SystemZMC::GR64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + // Add FPR/VR clobbers. + if (!NoFloat && (Control & 4) != 0) { + if (Subtarget.hasVector()) { + for (int I = 0; I < 32; I++) { + unsigned Reg = SystemZMC::VR128Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } else { + for (int I = 0; I < 16; I++) { + unsigned Reg = SystemZMC::FP64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + } + + return MBB; +} + +MachineBasicBlock * +SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + DebugLoc DL = MI->getDebugLoc(); + + unsigned SrcReg = MI->getOperand(0).getReg(); + + // Create new virtual register of the same class as source. + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + unsigned DstReg = MRI->createVirtualRegister(RC); + + // Replace pseudo with a normal load-and-test that models the def as + // well. + BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) + .addReg(SrcReg); + MI->eraseFromParent(); + + return MBB; +} + +MachineBasicBlock *SystemZTargetLowering:: +EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { + switch (MI->getOpcode()) { + case SystemZ::Select32Mux: + case SystemZ::Select32: + case SystemZ::SelectF32: + case SystemZ::Select64: + case SystemZ::SelectF64: + case SystemZ::SelectF128: + return emitSelect(MI, MBB); + + case SystemZ::CondStore8Mux: + return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); + case SystemZ::CondStore8MuxInv: + return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); + case SystemZ::CondStore16Mux: + return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); + case SystemZ::CondStore16MuxInv: + return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); + case SystemZ::CondStore8: + return emitCondStore(MI, MBB, SystemZ::STC, 0, false); + case SystemZ::CondStore8Inv: + return emitCondStore(MI, MBB, SystemZ::STC, 0, true); + case SystemZ::CondStore16: + return emitCondStore(MI, MBB, SystemZ::STH, 0, false); + case SystemZ::CondStore16Inv: + return emitCondStore(MI, MBB, SystemZ::STH, 0, true); + case SystemZ::CondStore32: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); + case SystemZ::CondStore32Inv: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); + case SystemZ::CondStore64: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); + case SystemZ::CondStore64Inv: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); + case SystemZ::CondStoreF32: + return emitCondStore(MI, MBB, SystemZ::STE, 0, false); + case SystemZ::CondStoreF32Inv: + return emitCondStore(MI, MBB, SystemZ::STE, 0, true); + case SystemZ::CondStoreF64: + return emitCondStore(MI, MBB, SystemZ::STD, 0, false); + case SystemZ::CondStoreF64Inv: + return emitCondStore(MI, MBB, SystemZ::STD, 0, true); + + case SystemZ::AEXT128_64: + return emitExt128(MI, MBB, false, SystemZ::subreg_l64); + case SystemZ::ZEXT128_32: + return emitExt128(MI, MBB, true, SystemZ::subreg_l32); + case SystemZ::ZEXT128_64: + return emitExt128(MI, MBB, true, SystemZ::subreg_l64); + + case SystemZ::ATOMIC_SWAPW: + return emitAtomicLoadBinary(MI, MBB, 0, 0); + case SystemZ::ATOMIC_SWAP_32: + return emitAtomicLoadBinary(MI, MBB, 0, 32); + case SystemZ::ATOMIC_SWAP_64: + return emitAtomicLoadBinary(MI, MBB, 0, 64); + + case SystemZ::ATOMIC_LOADW_AR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); + case SystemZ::ATOMIC_LOADW_AFI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); + case SystemZ::ATOMIC_LOAD_AR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); + case SystemZ::ATOMIC_LOAD_AHI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); + case SystemZ::ATOMIC_LOAD_AFI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); + case SystemZ::ATOMIC_LOAD_AGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); + case SystemZ::ATOMIC_LOAD_AGHI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); + case SystemZ::ATOMIC_LOAD_AGFI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); + + case SystemZ::ATOMIC_LOADW_SR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); + case SystemZ::ATOMIC_LOAD_SR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); + case SystemZ::ATOMIC_LOAD_SGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); + + case SystemZ::ATOMIC_LOADW_NR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); + case SystemZ::ATOMIC_LOADW_NILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); + case SystemZ::ATOMIC_LOAD_NR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); + case SystemZ::ATOMIC_LOAD_NILL: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); + case SystemZ::ATOMIC_LOAD_NILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); + case SystemZ::ATOMIC_LOAD_NILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); + case SystemZ::ATOMIC_LOAD_NGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); + case SystemZ::ATOMIC_LOAD_NILL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); + case SystemZ::ATOMIC_LOAD_NILH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); + case SystemZ::ATOMIC_LOAD_NIHL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64); + case SystemZ::ATOMIC_LOAD_NIHH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64); + case SystemZ::ATOMIC_LOAD_NILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); + case SystemZ::ATOMIC_LOAD_NIHF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64); + + case SystemZ::ATOMIC_LOADW_OR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); + case SystemZ::ATOMIC_LOADW_OILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); + case SystemZ::ATOMIC_LOAD_OR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); + case SystemZ::ATOMIC_LOAD_OILL: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); + case SystemZ::ATOMIC_LOAD_OILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); + case SystemZ::ATOMIC_LOAD_OILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); + case SystemZ::ATOMIC_LOAD_OGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); + case SystemZ::ATOMIC_LOAD_OILL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); + case SystemZ::ATOMIC_LOAD_OILH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); + case SystemZ::ATOMIC_LOAD_OIHL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64); + case SystemZ::ATOMIC_LOAD_OIHH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64); + case SystemZ::ATOMIC_LOAD_OILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); + case SystemZ::ATOMIC_LOAD_OIHF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64); + + case SystemZ::ATOMIC_LOADW_XR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); + case SystemZ::ATOMIC_LOADW_XILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); + case SystemZ::ATOMIC_LOAD_XR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); + case SystemZ::ATOMIC_LOAD_XILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); + case SystemZ::ATOMIC_LOAD_XGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); + case SystemZ::ATOMIC_LOAD_XILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); + case SystemZ::ATOMIC_LOAD_XIHF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64); + + case SystemZ::ATOMIC_LOADW_NRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); + case SystemZ::ATOMIC_LOADW_NILHi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); + case SystemZ::ATOMIC_LOAD_NRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); + case SystemZ::ATOMIC_LOAD_NILLi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); + case SystemZ::ATOMIC_LOAD_NILHi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); + case SystemZ::ATOMIC_LOAD_NILFi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); + case SystemZ::ATOMIC_LOAD_NGRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); + case SystemZ::ATOMIC_LOAD_NILL64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); + case SystemZ::ATOMIC_LOAD_NILH64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); + case SystemZ::ATOMIC_LOAD_NIHL64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true); + case SystemZ::ATOMIC_LOAD_NIHH64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true); + case SystemZ::ATOMIC_LOAD_NILF64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); + case SystemZ::ATOMIC_LOAD_NIHF64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true); + + case SystemZ::ATOMIC_LOADW_MIN: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_LE, 0); + case SystemZ::ATOMIC_LOAD_MIN_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_LE, 32); + case SystemZ::ATOMIC_LOAD_MIN_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, + SystemZ::CCMASK_CMP_LE, 64); + + case SystemZ::ATOMIC_LOADW_MAX: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_GE, 0); + case SystemZ::ATOMIC_LOAD_MAX_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_GE, 32); + case SystemZ::ATOMIC_LOAD_MAX_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, + SystemZ::CCMASK_CMP_GE, 64); + + case SystemZ::ATOMIC_LOADW_UMIN: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_LE, 0); + case SystemZ::ATOMIC_LOAD_UMIN_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_LE, 32); + case SystemZ::ATOMIC_LOAD_UMIN_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, + SystemZ::CCMASK_CMP_LE, 64); + + case SystemZ::ATOMIC_LOADW_UMAX: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_GE, 0); + case SystemZ::ATOMIC_LOAD_UMAX_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_GE, 32); + case SystemZ::ATOMIC_LOAD_UMAX_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, + SystemZ::CCMASK_CMP_GE, 64); + + case SystemZ::ATOMIC_CMP_SWAPW: + return emitAtomicCmpSwapW(MI, MBB); + case SystemZ::MVCSequence: + case SystemZ::MVCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::MVC); + case SystemZ::NCSequence: + case SystemZ::NCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::NC); + case SystemZ::OCSequence: + case SystemZ::OCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::OC); + case SystemZ::XCSequence: + case SystemZ::XCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::XC); + case SystemZ::CLCSequence: + case SystemZ::CLCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::CLC); + case SystemZ::CLSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::CLST); + case SystemZ::MVSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::MVST); + case SystemZ::SRSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::SRST); + case SystemZ::TBEGIN: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); + case SystemZ::TBEGIN_nofloat: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); + case SystemZ::TBEGINC: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); + case SystemZ::LTEBRCompare_VecPseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); + case SystemZ::LTDBRCompare_VecPseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); + case SystemZ::LTXBRCompare_VecPseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); + + default: + llvm_unreachable("Unexpected instr type to insert"); + } +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h new file mode 100644 index 0000000..391636e --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -0,0 +1,554 @@ +//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that SystemZ uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H + +#include "SystemZ.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { +namespace SystemZISD { +enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Return with a flag operand. Operand 0 is the chain operand. + RET_FLAG, + + // Calls a function. Operand 0 is the chain operand and operand 1 + // is the target address. The arguments start at operand 2. + // There is an optional glue operand at the end. + CALL, + SIBCALL, + + // TLS calls. Like regular calls, except operand 1 is the TLS symbol. + // (The call target is implicitly __tls_get_offset.) + TLS_GDCALL, + TLS_LDCALL, + + // Wraps a TargetGlobalAddress that should be loaded using PC-relative + // accesses (LARL). Operand 0 is the address. + PCREL_WRAPPER, + + // Used in cases where an offset is applied to a TargetGlobalAddress. + // Operand 0 is the full TargetGlobalAddress and operand 1 is a + // PCREL_WRAPPER for an anchor point. This is used so that we can + // cheaply refer to either the full address or the anchor point + // as a register base. + PCREL_OFFSET, + + // Integer absolute. + IABS, + + // Integer comparisons. There are three operands: the two values + // to compare, and an integer of type SystemZICMP. + ICMP, + + // Floating-point comparisons. The two operands are the values to compare. + FCMP, + + // Test under mask. The first operand is ANDed with the second operand + // and the condition codes are set on the result. The third operand is + // a boolean that is true if the condition codes need to distinguish + // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the + // register forms do but the memory forms don't). + TM, + + // Branches if a condition is true. Operand 0 is the chain operand; + // operand 1 is the 4-bit condition-code mask, with bit N in + // big-endian order meaning "branch if CC=N"; operand 2 is the + // target block and operand 3 is the flag operand. + BR_CCMASK, + + // Selects between operand 0 and operand 1. Operand 2 is the + // mask of condition-code values for which operand 0 should be + // chosen over operand 1; it has the same form as BR_CCMASK. + // Operand 3 is the flag operand. + SELECT_CCMASK, + + // Evaluates to the gap between the stack pointer and the + // base of the dynamically-allocatable area. + ADJDYNALLOC, + + // Extracts the value of a 32-bit access register. Operand 0 is + // the number of the register. + EXTRACT_ACCESS, + + // Count number of bits set in operand 0 per byte. + POPCNT, + + // Wrappers around the ISD opcodes of the same name. The output and + // first input operands are GR128s. The trailing numbers are the + // widths of the second operand in bits. + UMUL_LOHI64, + SDIVREM32, + SDIVREM64, + UDIVREM32, + UDIVREM64, + + // Use a series of MVCs to copy bytes from one memory location to another. + // The operands are: + // - the target address + // - the source address + // - the constant length + // + // This isn't a memory opcode because we'd need to attach two + // MachineMemOperands rather than one. + MVC, + + // Like MVC, but implemented as a loop that handles X*256 bytes + // followed by straight-line code to handle the rest (if any). + // The value of X is passed as an additional operand. + MVC_LOOP, + + // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR). + NC, + NC_LOOP, + OC, + OC_LOOP, + XC, + XC_LOOP, + + // Use CLC to compare two blocks of memory, with the same comments + // as for MVC and MVC_LOOP. + CLC, + CLC_LOOP, + + // Use an MVST-based sequence to implement stpcpy(). + STPCPY, + + // Use a CLST-based sequence to implement strcmp(). The two input operands + // are the addresses of the strings to compare. + STRCMP, + + // Use an SRST-based sequence to search a block of memory. The first + // operand is the end address, the second is the start, and the third + // is the character to search for. CC is set to 1 on success and 2 + // on failure. + SEARCH_STRING, + + // Store the CC value in bits 29 and 28 of an integer. + IPM, + + // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) + SERIALIZE, + + // Transaction begin. The first operand is the chain, the second + // the TDB pointer, and the third the immediate control field. + // Returns chain and glue. + TBEGIN, + TBEGIN_NOFLOAT, + + // Transaction end. Just the chain operand. Returns chain and glue. + TEND, + + // Create a vector constant by filling byte N of the result with bit + // 15-N of the single operand. + BYTE_MASK, + + // Create a vector constant by replicating an element-sized RISBG-style mask. + // The first operand specifies the starting set bit and the second operand + // specifies the ending set bit. Both operands count from the MSB of the + // element. + ROTATE_MASK, + + // Replicate a GPR scalar value into all elements of a vector. + REPLICATE, + + // Create a vector from two i64 GPRs. + JOIN_DWORDS, + + // Replicate one element of a vector into all elements. The first operand + // is the vector and the second is the index of the element to replicate. + SPLAT, + + // Interleave elements from the high half of operand 0 and the high half + // of operand 1. + MERGE_HIGH, + + // Likewise for the low halves. + MERGE_LOW, + + // Concatenate the vectors in the first two operands, shift them left + // by the third operand, and take the first half of the result. + SHL_DOUBLE, + + // Take one element of the first v2i64 operand and the one element of + // the second v2i64 operand and concatenate them to form a v2i64 result. + // The third operand is a 4-bit value of the form 0A0B, where A and B + // are the element selectors for the first operand and second operands + // respectively. + PERMUTE_DWORDS, + + // Perform a general vector permute on vector operands 0 and 1. + // Each byte of operand 2 controls the corresponding byte of the result, + // in the same way as a byte-level VECTOR_SHUFFLE mask. + PERMUTE, + + // Pack vector operands 0 and 1 into a single vector with half-sized elements. + PACK, + + // Likewise, but saturate the result and set CC. PACKS_CC does signed + // saturation and PACKLS_CC does unsigned saturation. + PACKS_CC, + PACKLS_CC, + + // Unpack the first half of vector operand 0 into double-sized elements. + // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. + UNPACK_HIGH, + UNPACKL_HIGH, + + // Likewise for the second half. + UNPACK_LOW, + UNPACKL_LOW, + + // Shift each element of vector operand 0 by the number of bits specified + // by scalar operand 1. + VSHL_BY_SCALAR, + VSRL_BY_SCALAR, + VSRA_BY_SCALAR, + + // For each element of the output type, sum across all sub-elements of + // operand 0 belonging to the corresponding element, and add in the + // rightmost sub-element of the corresponding element of operand 1. + VSUM, + + // Compare integer vector operands 0 and 1 to produce the usual 0/-1 + // vector result. VICMPE is for equality, VICMPH for "signed greater than" + // and VICMPHL for "unsigned greater than". + VICMPE, + VICMPH, + VICMPHL, + + // Likewise, but also set the condition codes on the result. + VICMPES, + VICMPHS, + VICMPHLS, + + // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 + // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and + // greater than" and VFCMPHE for "ordered and greater than or equal to". + VFCMPE, + VFCMPH, + VFCMPHE, + + // Likewise, but also set the condition codes on the result. + VFCMPES, + VFCMPHS, + VFCMPHES, + + // Test floating-point data class for vectors. + VFTCI, + + // Extend the even f32 elements of vector operand 0 to produce a vector + // of f64 elements. + VEXTEND, + + // Round the f64 elements of vector operand 0 to f32s and store them in the + // even elements of the result. + VROUND, + + // AND the two vector operands together and set CC based on the result. + VTM, + + // String operations that set CC as a side-effect. + VFAE_CC, + VFAEZ_CC, + VFEE_CC, + VFEEZ_CC, + VFENE_CC, + VFENEZ_CC, + VISTR_CC, + VSTRC_CC, + VSTRCZ_CC, + + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or + // ATOMIC_LOAD_<op>. + // + // Operand 0: the address of the containing 32-bit-aligned field + // Operand 1: the second operand of <op>, in the high bits of an i32 + // for everything except ATOMIC_SWAPW + // Operand 2: how many bits to rotate the i32 left to bring the first + // operand into the high bits + // Operand 3: the negative of operand 2, for rotating the other way + // Operand 4: the width of the field in bits (8 or 16) + ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE, + ATOMIC_LOADW_ADD, + ATOMIC_LOADW_SUB, + ATOMIC_LOADW_AND, + ATOMIC_LOADW_OR, + ATOMIC_LOADW_XOR, + ATOMIC_LOADW_NAND, + ATOMIC_LOADW_MIN, + ATOMIC_LOADW_MAX, + ATOMIC_LOADW_UMIN, + ATOMIC_LOADW_UMAX, + + // A wrapper around the inner loop of an ATOMIC_CMP_SWAP. + // + // Operand 0: the address of the containing 32-bit-aligned field + // Operand 1: the compare value, in the low bits of an i32 + // Operand 2: the swap value, in the low bits of an i32 + // Operand 3: how many bits to rotate the i32 left to bring the first + // operand into the high bits + // Operand 4: the negative of operand 2, for rotating the other way + // Operand 5: the width of the field in bits (8 or 16) + ATOMIC_CMP_SWAPW, + + // Prefetch from the second operand using the 4-bit control code in + // the first operand. The code is 1 for a load prefetch and 2 for + // a store prefetch. + PREFETCH +}; + +// Return true if OPCODE is some kind of PC-relative address. +inline bool isPCREL(unsigned Opcode) { + return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; +} +} // end namespace SystemZISD + +namespace SystemZICMP { +// Describes whether an integer comparison needs to be signed or unsigned, +// or whether either type is OK. +enum { + Any, + UnsignedOnly, + SignedOnly +}; +} // end namespace SystemZICMP + +class SystemZSubtarget; +class SystemZTargetMachine; + +class SystemZTargetLowering : public TargetLowering { +public: + explicit SystemZTargetLowering(const TargetMachine &TM, + const SystemZSubtarget &STI); + + // Override TargetLowering. + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } + MVT getVectorIdxTy(const DataLayout &DL) const override { + // Only the lower 12 bits of an element index are used, so we don't + // want to clobber the upper 32 bits of a GPR unnecessarily. + return MVT::i32; + } + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) + const override { + // Widen subvectors to the full width rather than promoting integer + // elements. This is better because: + // + // (a) it means that we can handle the ABI for passing and returning + // sub-128 vectors without having to handle them as legal types. + // + // (b) we don't have instructions to extend on load and truncate on store, + // so promoting the integers is less efficient. + // + // (c) there are no multiplication instructions for the widest integer + // type (v2i64). + if (VT.getVectorElementType().getSizeInBits() % 8 == 0) + return TypeWidenVector; + return TargetLoweringBase::getPreferredVectorAction(VT); + } + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT) const override; + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + bool isLegalICmpImmediate(int64_t Imm) const override; + bool isLegalAddImmediate(int64_t Imm) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, + unsigned Align, + bool *Fast) const override; + bool isTruncateFree(Type *, Type *) const override; + bool isTruncateFree(EVT, EVT) const override; + const char *getTargetNodeName(unsigned Opcode) const override; + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + TargetLowering::ConstraintType + getConstraintType(StringRef Constraint) const override; + TargetLowering::ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const override; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const override; + + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode.size() == 1) { + switch(ConstraintCode[0]) { + default: + break; + case 'Q': + return InlineAsm::Constraint_Q; + case 'R': + return InlineAsm::Constraint_R; + case 'S': + return InlineAsm::Constraint_S; + case 'T': + return InlineAsm::Constraint_T; + } + } + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return SystemZ::R6D; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return SystemZ::R7D; + } + + MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const + override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + bool allowTruncateForTailCall(Type *, Type *) const override; + bool mayBeEmittedAsTailCall(CallInst *CI) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const override; + SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, + SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + +private: + const SystemZSubtarget &Subtarget; + + // Implement LowerOperation for individual opcodes. + SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, unsigned Opcode, + SDValue GOTOffset) const; + SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerBlockAddress(BlockAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const; + SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, + unsigned Opcode) const; + SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, + unsigned UnpackHigh) const; + SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; + + SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, + unsigned Index, DAGCombinerInfo &DCI, + bool Force) const; + SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, + DAGCombinerInfo &DCI) const; + + // If the last instruction before MBBI in MBB was some form of COMPARE, + // try to replace it with a COMPARE AND BRANCH just before MBBI. + // CCMask and Target are the BRC-like operands for the branch. + // Return true if the change was made. + bool convertPrevCompareToBranch(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI, + unsigned CCMask, + MachineBasicBlock *Target) const; + + // Implement EmitInstrWithCustomInserter for individual operation types. + MachineBasicBlock *emitSelect(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitCondStore(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const; + MachineBasicBlock *emitExt128(MachineInstr *MI, + MachineBasicBlock *MBB, + bool ClearEven, unsigned SubReg) const; + MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned BinOpcode, unsigned BitSize, + bool Invert = false) const; + MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned CompareOpcode, + unsigned KeepOldMask, + unsigned BitSize) const; + MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; + MachineBasicBlock *emitStringWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; + MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const; + MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const; + +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h new file mode 100644 index 0000000..5a1c874 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -0,0 +1,46 @@ +//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" + +namespace llvm { + +/// Add a BDX memory reference for frame object FI to MIB. +static inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI) { + MachineInstr *MI = MIB; + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Flags = 0; + if (MCID.mayLoad()) + Flags |= MachineMemOperand::MOLoad; + if (MCID.mayStore()) + Flags |= MachineMemOperand::MOStore; + int64_t Offset = 0; + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags, + MFFrame->getObjectSize(FI), MFFrame->getObjectAlignment(FI)); + return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO); +} + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td new file mode 100644 index 0000000..0cb2672 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -0,0 +1,452 @@ +//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + +// C's ?: operator for floating-point operands. +def SelectF32 : SelectWrapper<FP32>; +def SelectF64 : SelectWrapper<FP64>; +def SelectF128 : SelectWrapper<FP128>; + +defm CondStoreF32 : CondStores<FP32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; +defm CondStoreF64 : CondStores<FP64, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load zero. +let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1 in { + def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>; + def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>; + def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>; +} + +// Moves between two floating-point registers. +let hasSideEffects = 0 in { + def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>; + def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>; + def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>; +} + +// Moves between two floating-point registers that also set the condition +// codes. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + defm LTEBR : LoadAndTestRRE<"lteb", 0xB302, FP32>; + defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; + defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; +} +// Note that LTxBRCompare is not available if we have vector support, +// since load-and-test instructions will partially clobber the target +// (vector) register. +let Predicates = [FeatureNoVector] in { + defm : CompareZeroFP<LTEBRCompare, FP32>; + defm : CompareZeroFP<LTDBRCompare, FP64>; + defm : CompareZeroFP<LTXBRCompare, FP128>; +} + +// Use a normal load-and-test for compare against zero in case of +// vector support (via a pseudo to simplify instruction selection). +let Defs = [CC], usesCustomInserter = 1 in { + def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>; + def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>; + def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>; +} +let Predicates = [FeatureVector] in { + defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>; + defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>; + defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>; +} + +// Moves between 64-bit integer and floating-point registers. +def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; +def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>; + +// fcopysign with an FP32 result. +let isCodeGenOnly = 1 in { + def CPSDRss : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP32>; + def CPSDRsd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP64>; +} + +// The sign of an FP128 is in the high register. +def : Pat<(fcopysign FP32:$src1, FP128:$src2), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; + +// fcopysign with an FP64 result. +let isCodeGenOnly = 1 in + def CPSDRds : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP32>; +def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>; + +// The sign of an FP128 is in the high register. +def : Pat<(fcopysign FP64:$src1, FP128:$src2), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; + +// fcopysign with an FP128 result. Use "upper" as the high half and leave +// the low half as-is. +class CopySign128<RegisterOperand cls, dag upper> + : Pat<(fcopysign FP128:$src1, cls:$src2), + (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; + +def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP32:$src2)>; +def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP64:$src2)>; +def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; + +defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>; +defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>; +defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>; + +//===----------------------------------------------------------------------===// +// Load instructions +//===----------------------------------------------------------------------===// + +let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { + defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; + + // For z13 we prefer LDE over LE to avoid partial register dependencies. + def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def LX : Pseudo<(outs FP128:$dst), (ins bdxaddr20only128:$src), + [(set FP128:$dst, (load bdxaddr20only128:$src))]>; + } +} + +//===----------------------------------------------------------------------===// +// Store instructions +//===----------------------------------------------------------------------===// + +let SimpleBDXStore = 1 in { + defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; + defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def STX : Pseudo<(outs), (ins FP128:$src, bdxaddr20only128:$dst), + [(store FP128:$src, bdxaddr20only128:$dst)]>; + } +} + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations, rounding +// according to the current mode. The destination of LEXBR and LDXBR +// is a 128-bit value, but only the first register of the pair is used. +def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>; +def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>; +def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>; + +def LEDBRA : UnaryRRF4<"ledbra", 0xB344, FP32, FP64>, + Requires<[FeatureFPExtension]>; +def LEXBRA : UnaryRRF4<"lexbra", 0xB346, FP128, FP128>, + Requires<[FeatureFPExtension]>; +def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>, + Requires<[FeatureFPExtension]>; + +def : Pat<(f32 (fround FP128:$src)), + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; +def : Pat<(f64 (fround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; + +// Extend register floating-point values to wider representations. +def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxeb", 0xB306, fextend, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdb", 0xB305, fextend, FP128, FP64>; + +// Extend memory floating-point values to wider representations. +def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; +def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; + +// Convert a signed integer register value to a floating-point one. +def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32, GR32>; +def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64, GR32>; +def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>; + +def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32, GR64>; +def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64, GR64>; +def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>; + +// Convert am unsigned integer register value to a floating-point one. +let Predicates = [FeatureFPExtension] in { + def CELFBR : UnaryRRF4<"celfbr", 0xB390, FP32, GR32>; + def CDLFBR : UnaryRRF4<"cdlfbr", 0xB391, FP64, GR32>; + def CXLFBR : UnaryRRF4<"cxlfbr", 0xB392, FP128, GR32>; + + def CELGBR : UnaryRRF4<"celgbr", 0xB3A0, FP32, GR64>; + def CDLGBR : UnaryRRF4<"cdlgbr", 0xB3A1, FP64, GR64>; + def CXLGBR : UnaryRRF4<"cxlgbr", 0xB3A2, FP128, GR64>; + + def : Pat<(f32 (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>; + def : Pat<(f64 (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>; + def : Pat<(f128 (uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>; + + def : Pat<(f32 (uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>; + def : Pat<(f64 (uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>; + def : Pat<(f128 (uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>; +} + +// Convert a floating-point register value to a signed integer value, +// with the second operand (modifier M3) specifying the rounding mode. +let Defs = [CC] in { + def CFEBR : UnaryRRF<"cfeb", 0xB398, GR32, FP32>; + def CFDBR : UnaryRRF<"cfdb", 0xB399, GR32, FP64>; + def CFXBR : UnaryRRF<"cfxb", 0xB39A, GR32, FP128>; + + def CGEBR : UnaryRRF<"cgeb", 0xB3A8, GR64, FP32>; + def CGDBR : UnaryRRF<"cgdb", 0xB3A9, GR64, FP64>; + def CGXBR : UnaryRRF<"cgxb", 0xB3AA, GR64, FP128>; +} + +// fp_to_sint always rounds towards zero, which is modifier value 5. +def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; + +def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; + +// Convert a floating-point register value to an unsigned integer value. +let Predicates = [FeatureFPExtension] in { + let Defs = [CC] in { + def CLFEBR : UnaryRRF4<"clfebr", 0xB39C, GR32, FP32>; + def CLFDBR : UnaryRRF4<"clfdbr", 0xB39D, GR32, FP64>; + def CLFXBR : UnaryRRF4<"clfxbr", 0xB39E, GR32, FP128>; + + def CLGEBR : UnaryRRF4<"clgebr", 0xB3AC, GR64, FP32>; + def CLGDBR : UnaryRRF4<"clgdbr", 0xB3AD, GR64, FP64>; + def CLGXBR : UnaryRRF4<"clgxbr", 0xB3AE, GR64, FP128>; + } + + def : Pat<(i32 (fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; + def : Pat<(i32 (fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; + def : Pat<(i32 (fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; + + def : Pat<(i64 (fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; + def : Pat<(i64 (fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; + def : Pat<(i64 (fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; +} + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// We prefer generic instructions during isel, because they do not +// clobber CC and therefore give the scheduler more freedom. In cases +// the CC is actually useful, the SystemZElimCompare pass will try to +// convert generic instructions into opcodes that also set CC. Note +// that lcdf / lpdf / lndf only affect the sign bit, and can therefore +// be used with fp32 as well. This could be done for fp128, in which +// case the operands would have to be tied. + +// Negation (Load Complement). +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LCEBR : UnaryRRE<"lceb", 0xB303, null_frag, FP32, FP32>; + def LCDBR : UnaryRRE<"lcdb", 0xB313, null_frag, FP64, FP64>; + def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>; +} +// Generic form, which does not set CC. +def LCDFR : UnaryRRE<"lcdf", 0xB373, fneg, FP64, FP64>; +let isCodeGenOnly = 1 in + def LCDFR_32 : UnaryRRE<"lcdf", 0xB373, fneg, FP32, FP32>; + +// Absolute value (Load Positive). +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LPEBR : UnaryRRE<"lpeb", 0xB300, null_frag, FP32, FP32>; + def LPDBR : UnaryRRE<"lpdb", 0xB310, null_frag, FP64, FP64>; + def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>; +} +// Generic form, which does not set CC. +def LPDFR : UnaryRRE<"lpdf", 0xB370, fabs, FP64, FP64>; +let isCodeGenOnly = 1 in + def LPDFR_32 : UnaryRRE<"lpdf", 0xB370, fabs, FP32, FP32>; + +// Negative absolute value (Load Negative). +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LNEBR : UnaryRRE<"lneb", 0xB301, null_frag, FP32, FP32>; + def LNDBR : UnaryRRE<"lndb", 0xB311, null_frag, FP64, FP64>; + def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>; +} +// Generic form, which does not set CC. +def LNDFR : UnaryRRE<"lndf", 0xB371, fnabs, FP64, FP64>; +let isCodeGenOnly = 1 in + def LNDFR_32 : UnaryRRE<"lndf", 0xB371, fnabs, FP32, FP32>; + +// Square root. +def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>; +def SQDBR : UnaryRRE<"sqdb", 0xB315, fsqrt, FP64, FP64>; +def SQXBR : UnaryRRE<"sqxb", 0xB316, fsqrt, FP128, FP128>; + +def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>; +def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>; + +// Round to an integer, with the second operand (modifier M3) specifying +// the rounding mode. These forms always check for inexact conditions. +def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>; +def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>; +def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>; + +// frint rounds according to the current mode (modifier 0) and detects +// inexact conditions. +def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; + +let Predicates = [FeatureFPExtension] in { + // Extended forms of the FIxBR instructions. M4 can be set to 4 + // to suppress detection of inexact conditions. + def FIEBRA : UnaryRRF4<"fiebra", 0xB357, FP32, FP32>; + def FIDBRA : UnaryRRF4<"fidbra", 0xB35F, FP64, FP64>; + def FIXBRA : UnaryRRF4<"fixbra", 0xB347, FP128, FP128>; + + // fnearbyint is like frint but does not detect inexact conditions. + def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + + // floor is no longer allowed to raise an inexact condition, + // so restrict it to the cases where the condition can be suppressed. + // Mode 7 is round towards -inf. + def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; + def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; + def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; + + // Same idea for ceil, where mode 6 is round towards +inf. + def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; + def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; + def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; + + // Same idea for trunc, where mode 5 is round towards zero. + def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; + def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; + def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; + + // Same idea for round, where mode 1 is round towards nearest with + // ties away from zero. + def : Pat<(frnd FP32:$src), (FIEBRA 1, FP32:$src, 4)>; + def : Pat<(frnd FP64:$src), (FIDBRA 1, FP64:$src, 4)>; + def : Pat<(frnd FP128:$src), (FIXBRA 1, FP128:$src, 4)>; +} + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + let isCommutable = 1 in { + def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axb", 0xB34A, fadd, FP128, FP128>; + } + def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; + def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; +} + +// Subtraction. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def SEBR : BinaryRRE<"seb", 0xB30B, fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>; + + def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; + def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; +} + +// Multiplication. +let isCommutable = 1 in { + def MEEBR : BinaryRRE<"meeb", 0xB317, fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdb", 0xB31C, fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxb", 0xB34C, fmul, FP128, FP128>; +} +def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; +def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; + +// f64 multiplication of two FP32 registers. +def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>; +def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), + (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + FP32:$src1, subreg_r32), FP32:$src2)>; + +// f64 multiplication of an FP32 register and an f32 memory. +def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; +def : Pat<(fmul (f64 (fextend FP32:$src1)), + (f64 (extloadf32 bdxaddr12only:$addr))), + (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), + bdxaddr12only:$addr)>; + +// f128 multiplication of two FP64 registers. +def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>; +def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_h64), FP64:$src2)>; + +// f128 multiplication of an FP64 register and an f64 memory. +def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; +def : Pat<(fmul (f128 (fextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), + bdxaddr12only:$addr)>; + +// Fused multiply-add. +def MAEBR : TernaryRRD<"maeb", 0xB30E, z_fma, FP32>; +def MADBR : TernaryRRD<"madb", 0xB31E, z_fma, FP64>; + +def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>; +def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>; + +// Fused multiply-subtract. +def MSEBR : TernaryRRD<"mseb", 0xB30F, z_fms, FP32>; +def MSDBR : TernaryRRD<"msdb", 0xB31F, z_fms, FP64>; + +def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>; +def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>; + +// Division. +def DEBR : BinaryRRE<"deb", 0xB30D, fdiv, FP32, FP32>; +def DDBR : BinaryRRE<"ddb", 0xB31D, fdiv, FP64, FP64>; +def DXBR : BinaryRRE<"dxb", 0xB34D, fdiv, FP128, FP128>; + +def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; +def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +let Defs = [CC], CCValues = 0xF in { + def CEBR : CompareRRE<"ceb", 0xB309, z_fcmp, FP32, FP32>; + def CDBR : CompareRRE<"cdb", 0xB319, z_fcmp, FP64, FP64>; + def CXBR : CompareRRE<"cxb", 0xB349, z_fcmp, FP128, FP128>; + + def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; +} + +//===----------------------------------------------------------------------===// +// Peepholes +//===----------------------------------------------------------------------===// + +def : Pat<(f32 fpimmneg0), (LCDFR_32 (LZER))>; +def : Pat<(f64 fpimmneg0), (LCDFR (LZDR))>; +def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td new file mode 100644 index 0000000..01f4cde --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -0,0 +1,2458 @@ +//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Basic SystemZ instruction definition +//===----------------------------------------------------------------------===// + +class InstSystemZ<int size, dag outs, dag ins, string asmstr, + list<dag> pattern> : Instruction { + let Namespace = "SystemZ"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let Size = size; + let Pattern = pattern; + let AsmString = asmstr; + + // Some instructions come in pairs, one having a 12-bit displacement + // and the other having a 20-bit displacement. Both instructions in + // the pair have the same DispKey and their DispSizes are "12" and "20" + // respectively. + string DispKey = ""; + string DispSize = "none"; + + // Many register-based <INSN>R instructions have a memory-based <INSN> + // counterpart. OpKey uniquely identifies <INSN>, while OpType is + // "reg" for <INSN>R and "mem" for <INSN>. + string OpKey = ""; + string OpType = "none"; + + // Many distinct-operands instructions have older 2-operand equivalents. + // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, + // with NumOpsValue being "2" or "3" as appropriate. + string NumOpsKey = ""; + string NumOpsValue = "none"; + + // True if this instruction is a simple D(X,B) load of a register + // (with no sign or zero extension). + bit SimpleBDXLoad = 0; + + // True if this instruction is a simple D(X,B) store of a register + // (with no truncation). + bit SimpleBDXStore = 0; + + // True if this instruction has a 20-bit displacement field. + bit Has20BitOffset = 0; + + // True if addresses in this instruction have an index register. + bit HasIndex = 0; + + // True if this is a 128-bit pseudo instruction that combines two 64-bit + // operations. + bit Is128Bit = 0; + + // The access size of all memory operands in bytes, or 0 if not known. + bits<5> AccessBytes = 0; + + // If the instruction sets CC to a useful value, this gives the mask + // of all possible CC results. The mask has the same form as + // SystemZ::CCMASK_*. + bits<4> CCValues = 0; + + // The subset of CCValues that have the same meaning as they would after + // a comparison of the first operand against zero. + bits<4> CompareZeroCCMask = 0; + + // True if the instruction is conditional and if the CC mask operand + // comes first (as for BRC, etc.). + bit CCMaskFirst = 0; + + // Similar, but true if the CC mask operand comes last (as for LOC, etc.). + bit CCMaskLast = 0; + + // True if the instruction is the "logical" rather than "arithmetic" form, + // in cases where a distinction exists. + bit IsLogical = 0; + + let TSFlags{0} = SimpleBDXLoad; + let TSFlags{1} = SimpleBDXStore; + let TSFlags{2} = Has20BitOffset; + let TSFlags{3} = HasIndex; + let TSFlags{4} = Is128Bit; + let TSFlags{9-5} = AccessBytes; + let TSFlags{13-10} = CCValues; + let TSFlags{17-14} = CompareZeroCCMask; + let TSFlags{18} = CCMaskFirst; + let TSFlags{19} = CCMaskLast; + let TSFlags{20} = IsLogical; +} + +//===----------------------------------------------------------------------===// +// Mappings between instructions +//===----------------------------------------------------------------------===// + +// Return the version of an instruction that has an unsigned 12-bit +// displacement. +def getDisp12Opcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; + let KeyCol = ["20"]; + let ValueCols = [["12"]]; +} + +// Return the version of an instruction that has a signed 20-bit displacement. +def getDisp20Opcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; + let KeyCol = ["12"]; + let ValueCols = [["20"]]; +} + +// Return the memory form of a register instruction. +def getMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["OpKey"]; + let ColFields = ["OpType"]; + let KeyCol = ["reg"]; + let ValueCols = [["mem"]]; +} + +// Return the 3-operand form of a 2-operand instruction. +def getThreeOperandOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["NumOpsKey"]; + let ColFields = ["NumOpsValue"]; + let KeyCol = ["2"]; + let ValueCols = [["3"]]; +} + +//===----------------------------------------------------------------------===// +// Instruction formats +//===----------------------------------------------------------------------===// +// +// Formats are specified using operand field declarations of the form: +// +// bits<4> Rn : register input or output for operand n +// bits<5> Vn : vector register input or output for operand n +// bits<m> In : immediate value of width m for operand n +// bits<4> BDn : address operand n, which has a base and a displacement +// bits<m> XBDn : address operand n, which has an index, a base and a +// displacement +// bits<m> VBDn : address operand n, which has a vector index, a base and a +// displacement +// bits<4> Xn : index register for address operand n +// bits<4> Mn : mode value for operand n +// +// The operand numbers ("n" in the list above) follow the architecture manual. +// Assembly operands sometimes have a different order; in particular, R3 often +// is often written between operands 1 and 2. +// +//===----------------------------------------------------------------------===// + +class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<16> I2; + + let Inst{31-24} = op{11-4}; + let Inst{23-20} = R1; + let Inst{19-16} = op{3-0}; + let Inst{15-0} = I2; +} + +class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + bits<16> RI4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31-16} = RI4; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRIEc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<8> I2; + bits<4> M3; + bits<16> RI4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = M3; + let Inst{31-16} = RI4; + let Inst{15-8} = I2; + let Inst{7-0} = op{7-0}; +} + +class InstRIEd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-16} = I2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<8> I3; + bits<8> I4; + bits<8> I5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31-24} = I3; + let Inst{23-16} = I4; + let Inst{15-8} = I5; + let Inst{7-0} = op{7-0}; +} + +class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<32> I2; + + let Inst{47-40} = op{11-4}; + let Inst{39-36} = R1; + let Inst{35-32} = op{3-0}; + let Inst{31-0} = I2; +} + +class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<2, outs, ins, asmstr, pattern> { + field bits<16> Inst; + field bits<16> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + + let Inst{15-8} = op; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<4> R2; + + let Inst{31-16} = op; + let Inst{15-12} = R1; + let Inst{11-8} = 0; + let Inst{7-4} = R3; + let Inst{3-0} = R2; +} + +class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + + let Inst{31-16} = op; + let Inst{15-8} = 0; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> R3; + bits<4> R4; + + let Inst{31-16} = op; + let Inst{15-12} = R3; + let Inst{11-8} = R4; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<20> XBD2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-0} = XBD2; + + let HasIndex = 1; +} + +class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<20> XBD2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-16} = XBD2; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; + + let HasIndex = 1; +} + +class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<20> XBD2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R3; + let Inst{35-16} = XBD2; + let Inst{15-12} = R1; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; + + let HasIndex = 1; +} + +class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<28> XBD2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-8} = XBD2; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; + let HasIndex = 1; +} + +class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> BD2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-16} = R3; + let Inst{15-0} = BD2; +} + +class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<24> BD2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-8} = BD2; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; +} + +class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<16> BD1; + bits<8> I2; + + let Inst{31-24} = op; + let Inst{23-16} = I2; + let Inst{15-0} = BD1; +} + +class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<16> BD1; + bits<16> I2; + + let Inst{47-32} = op; + let Inst{31-16} = BD1; + let Inst{15-0} = I2; +} + +class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<24> BD1; + bits<8> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-32} = I2; + let Inst{31-8} = BD1; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; +} + +class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<24> BDL1; + bits<16> BD2; + + let Inst{47-40} = op; + let Inst{39-16} = BDL1; + let Inst{15-0} = BD2; +} + +class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<16> BD2; + + let Inst{31-16} = op; + let Inst{15-0} = BD2; +} + +class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> I2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<8> I2; + bits<8> I3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-24} = I2; + let Inst{23-16} = I3; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V3; + bits<16> I2; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V3{3-0}; + let Inst{31-16} = I2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRId<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-16} = I4; + let Inst{15-12} = M5; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<12> I3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-20} = I3; + let Inst{19-16} = M5; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M4 value provided as explicit operand. These are passed as m4or. +class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m4or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> M3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = 0; + let Inst{23-20} = M5; + let Inst{19} = !if (!eq (m4or{3}, 1), 1, M4{3}); + let Inst{18} = !if (!eq (m4or{2}, 1), 1, M4{2}); + let Inst{17} = !if (!eq (m4or{1}, 1), 1, M4{1}); + let Inst{16} = !if (!eq (m4or{0}, 1), 1, M4{0}); + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M5 value provided as explicit operand. These are passed as m5or. +class InstVRRb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m5or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23} = !if (!eq (m5or{3}, 1), 1, M5{3}); + let Inst{22} = !if (!eq (m5or{2}, 1), 1, M5{2}); + let Inst{21} = !if (!eq (m5or{1}, 1), 1, M5{1}); + let Inst{20} = !if (!eq (m5or{0}, 1), 1, M5{0}); + let Inst{19-16} = 0; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<4> M4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M6; + let Inst{19-16} = M5; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M6 value provided as explicit operand. These are passed as m6or. +class InstVRRd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m6or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<5> V4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = M5; + let Inst{23} = !if (!eq (m6or{3}, 1), 1, M6{3}); + let Inst{22} = !if (!eq (m6or{2}, 1), 1, M6{2}); + let Inst{21} = !if (!eq (m6or{1}, 1), 1, M6{1}); + let Inst{20} = !if (!eq (m6or{0}, 1), 1, M6{0}); + let Inst{19-16} = 0; + let Inst{15-12} = V4{3-0}; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = V4{4}; + let Inst{7-0} = op{7-0}; +} + +class InstVRRe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<5> V4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = M6; + let Inst{23-20} = 0; + let Inst{19-16} = M5; + let Inst{15-12} = V4{3-0}; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = V4{4}; + let Inst{7-0} = op{7-0}; +} + +class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> R2; + bits<4> R3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R2; + let Inst{31-28} = R3; + let Inst{27-12} = 0; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<5> V3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V3{3-0}; + let Inst{31-16} = BD2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<4> R3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R3; + let Inst{31-16} = BD2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<16> BD2; + bits<5> V3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = V3{3-0}; + let Inst{31-16} = BD2; + let Inst{15-12} = M4; + let Inst{11} = 0; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<21> VBD2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-16} = VBD2{19-0}; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10} = VBD2{20}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<20> XBD2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-16} = XBD2; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +//===----------------------------------------------------------------------===// +// Instruction definitions with semantics +//===----------------------------------------------------------------------===// +// +// These classes have the form [Cond]<Category><Format>, where <Format> is one +// of the formats defined above and where <Category> describes the inputs +// and outputs. "Cond" is used if the instruction is conditional, +// in which case the 4-bit condition-code mask is added as a final operand. +// <Category> can be one of: +// +// Inherent: +// One register output operand and no input operands. +// +// BranchUnary: +// One register output operand, one register input operand and +// one branch displacement. The instructions stores a modified +// form of the source register in the destination register and +// branches on the result. +// +// LoadMultiple: +// One address input operand and two explicit output operands. +// The instruction loads a range of registers from the address, +// with the explicit operands giving the first and last register +// to load. Other loaded registers are added as implicit definitions. +// +// StoreMultiple: +// Two explicit input register operands and an address operand. +// The instruction stores a range of registers to the address, +// with the explicit operands giving the first and last register +// to store. Other stored registers are added as implicit uses. +// +// StoreLength: +// One value operand, one length operand and one address operand. +// The instruction stores the value operand to the address but +// doesn't write more than the number of bytes specified by the +// length operand. +// +// Unary: +// One register output operand and one input operand. +// +// Store: +// One address operand and one other input operand. The instruction +// stores to the address. +// +// Binary: +// One register output operand and two input operands. +// +// StoreBinary: +// One address operand and two other input operands. The instruction +// stores to the address. +// +// Compare: +// Two input operands and an implicit CC output operand. +// +// Ternary: +// One register output operand and three input operands. +// +// Quaternary: +// One register output operand and four input operands. +// +// LoadAndOp: +// One output operand and two input operands, one of which is an address. +// The instruction both reads from and writes to the address. +// +// CmpSwap: +// One output operand and three input operands, one of which is an address. +// The instruction both reads from and writes to the address. +// +// RotateSelect: +// One output operand and five input operands. The first two operands +// are registers and the other three are immediates. +// +// Prefetch: +// One 4-bit immediate operand and one address operand. The immediate +// operand is 1 for a load prefetch and 2 for a store prefetch. +// +// The format determines which input operands are tied to output operands, +// and also determines the shape of any address operand. +// +// Multiclasses of the form <Category><Format>Pair define two instructions, +// one with <Category><Format> and one with <Category><Format>Y. The name +// of the first instruction has no suffix, the name of the second has +// an extra "y". +// +//===----------------------------------------------------------------------===// + +class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, + dag src> + : InstRRE<opcode, (outs cls:$R1), (ins), + mnemonic#"\t$R1", + [(set cls:$R1, src)]> { + let R2 = 0; +} + +class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value> + : InstVRIa<opcode, (outs VR128:$V1), (ins), mnemonic#"\t$V1", []> { + let I2 = value; + let M3 = 0; +} + +class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls> + : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2), + mnemonic##"\t$R1, $I2", []> { + let isBranch = 1; + let isTerminator = 1; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayLoad = 1; +} + +class LoadMultipleVRSa<string mnemonic, bits<16> opcode> + : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", []> { + let M4 = 0; + let mayLoad = 1; +} + +class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, pcrel32:$I2)]> { + let mayStore = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let mayStore = 1; + let AccessBytes = bytes; +} + +multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; + } +} + +class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes, bits<4> type = 0> + : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2), + mnemonic#"\t$V1, $XBD2", + [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> { + let M3 = type; + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreLengthVRSb<string mnemonic, bits<16> opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> { + let M4 = 0; + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayStore = 1; +} + +class StoreMultipleVRSa<string mnemonic, bits<16> opcode> + : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", []> { + let M4 = 0; + let mayStore = 1; +} + +// StoreSI* instructions are used to store an integer to memory, but the +// addresses are more restricted than for normal stores. If we are in the +// situation of having to force either the address into a register or the +// constant into a register, it's usually better to do the latter. +// We therefore match the address in the same way as a normal store and +// only use the StoreSI* instruction if the matched address is suitable. +class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + Immediate imm> + : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr12pair:$BD1)]> { + let mayStore = 1; +} + +class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + Immediate imm> + : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr20pair:$BD1)]> { + let mayStore = 1; +} + +class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, + Immediate imm> + : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr12pair:$BD1)]> { + let mayStore = 1; +} + +multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, + SDPatternOperator operator, Immediate imm> { + let DispKey = mnemonic in { + let DispSize = "12" in + def "" : StoreSI<mnemonic, siOpcode, operator, imm>; + let DispSize = "20" in + def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>; + } +} + +class CondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), + mnemonic#"$R3\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondStoreRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$R3), + mnemonic#"\t$R1, $BD2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +// Like CondStoreRSY, but with a fixed CC mask. +class FixedCondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<4> ccmask, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2), + mnemonic#"\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; + let R3 = ccmask; +} + +class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} + +class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} + +class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2), + mnemonic#"r\t$R1, $R3, $R2", []> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let R4 = 0; +} + +class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2, imm32zx4:$R4), + mnemonic#"\t$R1, $R3, $R2, $R4", []>; + +// These instructions are generated by if conversion. The old value of R1 +// is added as an implicit use. +class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3), + mnemonic#"r$R3\t$R1, $R2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let CCMaskLast = 1; + let R4 = 0; +} + +// Like CondUnaryRRF, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, imm32zx4:$R3), + mnemonic#"r\t$R1, $R2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R4 = 0; +} + +// Like CondUnaryRRF, but with a fixed CC mask. +class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, bits<4> ccmask> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R3 = ccmask; + let R4 = 0; +} + +class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRI<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; + +class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIL<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; + +class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRIL<opcode, (outs cls:$R1), (ins pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator pcrel32:$I2))]> { + let mayLoad = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class CondUnaryRSY<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), + (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3), + mnemonic#"$R3\t$R1, $BD2", + [(set cls:$R1, + (z_select_ccmask (load bdaddr20only:$BD2), cls:$R1src, + cond4:$valid, cond4:$R3))]>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondUnaryRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondUnaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$R3), + mnemonic#"\t$R1, $BD2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayLoad = 1; + let AccessBytes = bytes; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondUnaryRSY, but with a fixed CC mask. +class FixedCondUnaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<4> ccmask, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2), + mnemonic#"\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R3 = ccmask; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes> + : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; + let M3 = 0; +} + +class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; + } +} + +class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, Immediate imm, bits<4> type = 0> + : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2), + mnemonic#"\t$V1, $I2", + [(set tr.op:$V1, (tr.vt (operator imm:$I2)))]> { + let M3 = type; +} + +class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0, + bits<4> m5 = 0> + : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2), + mnemonic#"\t$V1, $V2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]> { + let M3 = type; + let M4 = m4; + let M5 = m5; +} + +multiclass UnaryVRRaSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> modifier = 0, + bits<4> modifier_cc = 1> { + def "" : UnaryVRRa<mnemonic, opcode, operator, tr1, tr2, type, 0, modifier>; + let Defs = [CC] in + def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 0, + modifier_cc>; +} + +class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes, bits<4> type = 0> + : InstVRX<opcode, (outs tr.op:$V1), (ins bdxaddr12only:$XBD2), + mnemonic#"\t$V1, $XBD2", + [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> { + let M3 = type; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2), + mnemonic#"r\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let R4 = 0; +} + +class BinaryRRFK<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3), + mnemonic#"rk\t$R1, $R2, $R3", + [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> { + let R4 = 0; +} + +multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>; + } +} + +multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>; + } +} + +class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2), + mnemonic#"\t$R1, $R3, $I2", + [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; + +multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls, + Immediate imm> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; + } +} + +class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2), + mnemonic#"\t$R1, $BD2", + [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> { + let R3 = 0; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>; + +multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRS<mnemonic, opcode1, operator, cls>; + } +} + +class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, + (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let M3 = 0; +} + +class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, + bdxaddr12pair>; + let DispSize = "20" in + def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, bytes, + bdxaddr20pair>; + } +} + +class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + Operand imm, AddressingMode mode = bdaddr12only> + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { + let mayLoad = 1; + let mayStore = 1; +} + +class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + Operand imm, AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { + let mayLoad = 1; + let mayStore = 1; +} + +multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, + bits<16> siyOpcode, SDPatternOperator operator, + Operand imm> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; + let DispSize = "20" in + def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; + } +} + +class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3), + mnemonic#"\t$V1, $I2, $I3", + [(set tr.op:$V1, (tr.vt (operator imm32zx8:$I2, imm32zx8:$I3)))]> { + let M4 = type; +} + +class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2), + mnemonic#"\t$V1, $V3, $I2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3), + imm32zx16:$I2)))]> { + let M4 = type; +} + +class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5> + : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3), + mnemonic#"\t$V1, $V2, $I3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + imm32zx12:$I3)))]> { + let M4 = type; + let M5 = m5; +} + +class BinaryVRRa<string mnemonic, bits<16> opcode> + : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3), + mnemonic#"\t$V1, $V2, $M3", []> { + let M4 = 0; + let M5 = 0; +} + +class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, + bits<4> modifier = 0> + : InstVRRb<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3), + mnemonic#"\t$V1, $V2, $V3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3))))]> { + let M4 = type; + let M5 = modifier; +} + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M5. +multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, + bits<4> modifier = 0, bits<4> modifier_cc = 1> { + def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, modifier>; + let Defs = [CC] in + def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + modifier_cc>; +} + +class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0, + bits<4> m6 = 0> + : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3), + mnemonic#"\t$V1, $V2, $V3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3))))]> { + let M4 = type; + let M5 = m5; + let M6 = m6; +} + +multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> m5, + bits<4> modifier = 0, bits<4> modifier_cc = 1> { + def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, modifier>; + let Defs = [CC] in + def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + m5, modifier_cc>; +} + +class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr> + : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3), + mnemonic#"\t$V1, $R2, $R3", + [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>; + +class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3), + shift12only:$BD2)))]> { + let M4 = type; +} + +class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> { + let M4 = 0; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRSc<opcode, (outs GR64:$R1), (ins tr.op:$V3, shift12only:$BD2), + mnemonic#"\t$R1, $V3, $BD2", + [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> { + let M4 = type; +} + +class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes> + : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2, + imm32zx4:$M3)))]> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, + Immediate index> + : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3), + mnemonic#"\t$V1, $VBD2, $M3", []> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreBinaryVRX<string mnemonic, bits<16> opcode, + SDPatternOperator operator, TypedReg tr, bits<5> bytes, + Immediate index> + : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let isCompare = 1; +} + +class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let isCompare = 1; +} + +class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} + +class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} + +class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load> + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, (load pcrel32:$I2))]> { + let isCompare = 1; + let mayLoad = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; + let mayLoad = 1; + let AccessBytes = bytes; + let M3 = 0; +} + +class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : CompareRX<mnemonic, rxOpcode, operator, cls, + load, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls, + load, bytes, bdxaddr20pair>; + } +} + +class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + SDPatternOperator load, Immediate imm, + AddressingMode mode = bdaddr12only> + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { + let isCompare = 1; + let mayLoad = 1; +} + +class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, + SDPatternOperator load, Immediate imm> + : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load bdaddr12only:$BD1), imm:$I2)]> { + let isCompare = 1; + let mayLoad = 1; +} + +class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + SDPatternOperator load, Immediate imm, + AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { + let isCompare = 1; + let mayLoad = 1; +} + +multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, + SDPatternOperator operator, SDPatternOperator load, + Immediate imm> { + let DispKey = mnemonic in { + let DispSize = "12" in + def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>; + let DispSize = "20" in + def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm, + bdaddr20pair>; + } +} + +class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2), + mnemonic#"\t$V1, $V2", + [(operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2))]> { + let isCompare = 1; + let M3 = type; + let M4 = 0; + let M5 = 0; +} + +class TernaryRRD<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls> + : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2), + mnemonic#"r\t$R1, $R3, $R2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> { + let OpKey = mnemonic ## cls; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXF<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $R3, $XBD2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, + (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index> + : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3), + mnemonic#"\t$V1, $I2, $M3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + imm:$I2, index:$M3)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; +} + +class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRId<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4), + mnemonic#"\t$V1, $V2, $V3, $I4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx8:$I4)))]> { + let M5 = type; +} + +class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or> + : InstVRRa<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M4, $M5", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + imm32zx4:$M4, + imm32zx4:$M5)))], + m4or> { + let M3 = type; +} + +class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, + SDPatternOperator m5mask, bits<4> m5or> + : InstVRRb<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, m5mask:$M5), + mnemonic#"\t$V1, $V2, $V3, $M5", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + m5mask:$M5)))], + m5or> { + let M4 = type; +} + +multiclass TernaryVRRbSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> m5or> { + def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, + imm32zx4even, !and (m5or, 14)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, 0)>; + let Defs = [CC] in + def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + imm32zx4even, !add(!and (m5or, 14), 1)>; + def : InstAlias<mnemonic#"s\t$V1, $V2, $V3", + (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, 0)>; +} + +class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2> + : InstVRRc<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M4), + mnemonic#"\t$V1, $V2, $V3, $M4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx4:$M4)))]> { + let M5 = 0; + let M6 = 0; +} + +class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0> + : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), + mnemonic#"\t$V1, $V2, $V3, $V4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4))))]> { + let M5 = type; + let M6 = 0; +} + +class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> + : InstVRRe<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), + mnemonic#"\t$V1, $V2, $V3, $V4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4))))]> { + let M5 = m5; + let M6 = type; +} + +class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type> + : InstVRSb<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, cls:$R3, shift12only:$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + cls:$R3, + shift12only:$BD2)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let M4 = type; +} + +class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, + Immediate index> + : InstVRV<opcode, (outs VR128:$V1), + (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3), + mnemonic#"\t$V1, $VBD2, $M3", []> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index> + : InstVRX<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + bdxaddr12only:$XBD2, + index:$M3)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRId<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4), + mnemonic#"\t$V1, $V2, $V3, $I4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx8:$I4)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let M5 = type; +} + +class QuaternaryVRRd<string mnemonic, bits<16> opcode, + SDPatternOperator operator, TypedReg tr1, TypedReg tr2, + bits<4> type, SDPatternOperator m6mask, bits<4> m6or> + : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, m6mask:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M6", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr2.vt tr2.op:$V4), + m6mask:$M6)))], + m6or> { + let M5 = type; +} + +multiclass QuaternaryVRRdSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> m6or> { + def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type, + imm32zx4even, !and (m6or, 14)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr2.op:$V4, 0)>; + let Defs = [CC] in + def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + imm32zx4even, !add (!and (m6or, 14), 1)>; + def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr2.op:$V4, 0)>; +} + +class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R3))]> { + let mayLoad = 1; + let mayStore = 1; +} + +class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr12only> + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let mayStore = 1; +} + +class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let mayStore = 1; +} + +multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, + SDPatternOperator operator, RegisterOperand cls> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>; + let DispSize = "20" in + def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>; + } +} + +class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRIEf<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), + mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator> + : InstRXY<opcode, (outs), (ins imm32zx4:$R1, bdxaddr20only:$XBD2), + mnemonic##"\t$R1, $XBD2", + [(operator imm32zx4:$R1, bdxaddr20only:$XBD2)]>; + +class PrefetchRILPC<string mnemonic, bits<12> opcode, + SDPatternOperator operator> + : InstRIL<opcode, (outs), (ins imm32zx4:$R1, pcrel32:$I2), + mnemonic##"\t$R1, $I2", + [(operator imm32zx4:$R1, pcrel32:$I2)]> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +// A floating-point load-and test operation. Create both a normal unary +// operation and one that acts as a comparison against zero. +// Note that the comparison against zero operation is not available if we +// have vector support, since load-and-test instructions will partially +// clobber the target (vector) register. +multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls> { + def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>; + let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in + def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>; +} + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// +// +// Convenience instructions that get lowered to real instructions +// by either SystemZTargetLowering::EmitInstrWithCustomInserter() +// or SystemZInstrInfo::expandPostRAPseudo(). +// +//===----------------------------------------------------------------------===// + +class Pseudo<dag outs, dag ins, list<dag> pattern> + : InstSystemZ<0, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +// Like UnaryRI, but expanded after RA depending on the choice of register. +class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs cls:$R1), (ins imm:$I2), + [(set cls:$R1, (operator imm:$I2))]>; + +// Like UnaryRXY, but expanded after RA depending on the choice of register. +class UnaryRXYPseudo<string key, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : Pseudo<(outs cls:$R1), (ins mode:$XBD2), + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = key ## cls; + let OpType = "mem"; + let mayLoad = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like UnaryRR, but expanded after RA depending on the choice of registers. +class UnaryRRPseudo<string key, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), (ins cls2:$R2), + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = key ## cls1; + let OpType = "reg"; +} + +// Like BinaryRI, but expanded after RA depending on the choice of register. +class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// Like BinaryRIE, but expanded after RA depending on the choice of register. +class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2), + [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; + +// Like BinaryRIAndK, but expanded after RA depending on the choice of register. +multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> { + let NumOpsKey = key in { + let NumOpsValue = "3" in + def K : BinaryRIEPseudo<null_frag, cls, imm>, + Requires<[FeatureHighWord, FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRIPseudo<operator, cls, imm>, + Requires<[FeatureHighWord]>; + } +} + +// Like CompareRI, but expanded after RA depending on the choice of register. +class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]>; + +// Like CompareRXY, but expanded after RA depending on the choice of register. +class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : Pseudo<(outs), (ins cls:$R1, mode:$XBD2), + [(operator cls:$R1, (load mode:$XBD2))]> { + let mayLoad = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like StoreRXY, but expanded after RA depending on the choice of register. +class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdxaddr20only> + : Pseudo<(outs), (ins cls:$R1, mode:$XBD2), + [(operator cls:$R1, mode:$XBD2)]> { + let mayStore = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like RotateSelectRIEf, but expanded after RA depending on the choice +// of registers. +class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), + []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is +// the value of the PSW's 2-bit condition code field. +class SelectWrapper<RegisterOperand cls> + : Pseudo<(outs cls:$dst), + (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), + [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, + imm32zx4:$valid, imm32zx4:$cc))]> { + let usesCustomInserter = 1; + // Although the instructions used by these nodes do not in themselves + // change CC, the insertion requires new blocks, and CC cannot be live + // across them. + let Defs = [CC]; + let Uses = [CC]; +} + +// Stores $new to $addr if $cc is true ("" case) or false (Inv case). +multiclass CondStores<RegisterOperand cls, SDPatternOperator store, + SDPatternOperator load, AddressingMode mode> { + let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { + def "" : Pseudo<(outs), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), + [(store (z_select_ccmask cls:$new, (load mode:$addr), + imm32zx4:$valid, imm32zx4:$cc), + mode:$addr)]>; + def Inv : Pseudo<(outs), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), + [(store (z_select_ccmask (load mode:$addr), cls:$new, + imm32zx4:$valid, imm32zx4:$cc), + mode:$addr)]>; + } +} + +// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND +// describe the second (non-memory) operand. +class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls, + dag pat, DAGOperand operand> + : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2), + [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> { + let Defs = [CC]; + let Has20BitOffset = 1; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; +} + +// Specializations of AtomicLoadWBinary. +class AtomicLoadBinaryReg32<SDPatternOperator operator> + : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>; +class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm> + : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>; +class AtomicLoadBinaryReg64<SDPatternOperator operator> + : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>; +class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm> + : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>; + +// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND +// describe the second (non-memory) operand. +class AtomicLoadWBinary<SDPatternOperator operator, dag pat, + DAGOperand operand> + : Pseudo<(outs GR32:$dst), + (ins bdaddr20only:$ptr, operand:$src2, ADDR32:$bitshift, + ADDR32:$negbitshift, uimm32:$bitsize), + [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift, + ADDR32:$negbitshift, uimm32:$bitsize))]> { + let Defs = [CC]; + let Has20BitOffset = 1; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; +} + +// Specializations of AtomicLoadWBinary. +class AtomicLoadWBinaryReg<SDPatternOperator operator> + : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>; +class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm> + : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>; + +// Define an instruction that operates on two fixed-length blocks of memory, +// and associated pseudo instructions for operating on blocks of any size. +// The Sequence form uses a straight-line sequence of instructions and +// the Loop form uses a loop of length-256 instructions followed by +// another instruction to handle the excess. +multiclass MemorySS<string mnemonic, bits<8> opcode, + SDPatternOperator sequence, SDPatternOperator loop> { + def "" : InstSS<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, + bdaddr12only:$BD2), + mnemonic##"\t$BDL1, $BD2", []>; + let usesCustomInserter = 1 in { + def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(sequence bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length)]>; + def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256), + [(loop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256)]>; + } +} + +// Define an instruction that operates on two strings, both terminated +// by the character in R0. The instruction processes a CPU-determinated +// number of bytes at a time and sets CC to 3 if the instruction needs +// to be repeated. Also define a pseudo instruction that represents +// the full loop (the main instruction plus the branch on CC==3). +multiclass StringRRE<string mnemonic, bits<16> opcode, + SDPatternOperator operator> { + def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2), + (ins GR64:$R1src, GR64:$R2src), + mnemonic#"\t$R1, $R2", []> { + let Uses = [R0L]; + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + } + let usesCustomInserter = 1 in + def Loop : Pseudo<(outs GR64:$end), + (ins GR64:$start1, GR64:$start2, GR32:$char), + [(set GR64:$end, (operator GR64:$start1, GR64:$start2, + GR32:$char))]>; +} + +// A pseudo instruction that is a direct alias of a real instruction. +// These aliases are used in cases where a particular register operand is +// fixed or where the same instruction is used with different register sizes. +// The size parameter is the size in bytes of the associated real instruction. +class Alias<int size, dag outs, dag ins, list<dag> pattern> + : InstSystemZ<size, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2> + : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>; + +// An alias of a UnaryVRR*, but with different register sizes. +class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2> + : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2), + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>; + +// An alias of a UnaryVRX, but with different register sizes. +class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr, + AddressingMode mode = bdxaddr12only> + : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2), + [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>; + +// An alias of a StoreVRX, but with different register sizes. +class StoreAliasVRX<SDPatternOperator operator, TypedReg tr, + AddressingMode mode = bdxaddr12only> + : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2), + [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>; + +// An alias of a BinaryRI, but with different register sizes. +class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// An alias of a BinaryRIL, but with different register sizes. +class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// An alias of a BinaryVRRf, but with different register sizes. +class BinaryAliasVRRf<RegisterOperand cls> + : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>; + +// An alias of a CompareRI, but with different register sizes. +class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} + +// An alias of a RotateSelectRIEf, but with different register sizes. +class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2> + : Alias<6, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), []> { + let Constraints = "$R1 = $R1src"; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp new file mode 100644 index 0000000..e6b5fc8 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -0,0 +1,1294 @@ +//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZInstrInfo.h" +#include "SystemZInstrBuilder.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#include "SystemZGenInstrInfo.inc" + +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + +// Reg should be a 32-bit GPR. Return true if it is a high register rather +// than a low register. +static bool isHighReg(unsigned int Reg) { + if (SystemZ::GRH32BitRegClass.contains(Reg)) + return true; + assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); + return false; +} + +// Pin the vtable to this file. +void SystemZInstrInfo::anchor() {} + +SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti) + : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP), + RI(), STI(sti) { +} + +// MI is a 128-bit load or store. Split it into two 64-bit loads or stores, +// each having the opcode given by NewOpcode. +void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, + unsigned NewOpcode) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + + // Get two load or store instructions. Use the original instruction for one + // of them (arbitrarily the second here) and create a clone for the other. + MachineInstr *EarlierMI = MF.CloneMachineInstr(MI); + MBB->insert(MI, EarlierMI); + + // Set up the two 64-bit registers. + MachineOperand &HighRegOp = EarlierMI->getOperand(0); + MachineOperand &LowRegOp = MI->getOperand(0); + HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); + LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64)); + + // The address in the first (high) instruction is already correct. + // Adjust the offset in the second (low) instruction. + MachineOperand &HighOffsetOp = EarlierMI->getOperand(2); + MachineOperand &LowOffsetOp = MI->getOperand(2); + LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); + + // Clear the kill flags for the base and index registers in the first + // instruction. + EarlierMI->getOperand(1).setIsKill(false); + EarlierMI->getOperand(3).setIsKill(false); + + // Set the opcodes. + unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm()); + unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm()); + assert(HighOpcode && LowOpcode && "Both offsets should be in range"); + + EarlierMI->setDesc(get(HighOpcode)); + MI->setDesc(get(LowOpcode)); +} + +// Split ADJDYNALLOC instruction MI. +void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + MachineOperand &OffsetMO = MI->getOperand(2); + + uint64_t Offset = (MFFrame->getMaxCallFrameSize() + + SystemZMC::CallFrameSize + + OffsetMO.getImm()); + unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset); + assert(NewOpcode && "No support for huge argument lists yet"); + MI->setDesc(get(NewOpcode)); + OffsetMO.setImm(Offset); +} + +// MI is an RI-style pseudo instruction. Replace it with LowOpcode +// if the first operand is a low GR32 and HighOpcode if the first operand +// is a high GR32. ConvertHigh is true if LowOpcode takes a signed operand +// and HighOpcode takes an unsigned 32-bit operand. In those cases, +// MI has the same kind of operand as LowOpcode, so needs to be converted +// if HighOpcode is used. +void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode, + bool ConvertHigh) const { + unsigned Reg = MI->getOperand(0).getReg(); + bool IsHigh = isHighReg(Reg); + MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode)); + if (IsHigh && ConvertHigh) + MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm())); +} + +// MI is a three-operand RIE-style pseudo instruction. Replace it with +// LowOpcodeK if the registers are both low GR32s, otherwise use a move +// followed by HighOpcode or LowOpcode, depending on whether the target +// is a high or low GR32. +void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned LowOpcodeK, + unsigned HighOpcode) const { + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + bool DestIsHigh = isHighReg(DestReg); + bool SrcIsHigh = isHighReg(SrcReg); + if (!DestIsHigh && !SrcIsHigh) + MI->setDesc(get(LowOpcodeK)); + else { + emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(), + DestReg, SrcReg, SystemZ::LR, 32, + MI->getOperand(1).isKill()); + MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); + MI->getOperand(1).setReg(DestReg); + MI->tieOperands(0, 1); + } +} + +// MI is an RXY-style pseudo instruction. Replace it with LowOpcode +// if the first operand is a low GR32 and HighOpcode if the first operand +// is a high GR32. +void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode) const { + unsigned Reg = MI->getOperand(0).getReg(); + unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode, + MI->getOperand(2).getImm()); + MI->setDesc(get(Opcode)); +} + +// MI is an RR-style pseudo instruction that zero-extends the low Size bits +// of one GRX32 into another. Replace it with LowOpcode if both operands +// are low registers, otherwise use RISB[LH]G. +void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned Size) const { + emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(), + MI->getOperand(0).getReg(), MI->getOperand(1).getReg(), + LowOpcode, Size, MI->getOperand(1).isKill()); + MI->eraseFromParent(); +} + +// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR +// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg +// are low registers, otherwise use RISB[LH]G. Size is the number of bits +// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR). +// KillSrc is true if this move is the last use of SrcReg. +void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, unsigned LowLowOpcode, + unsigned Size, bool KillSrc) const { + unsigned Opcode; + bool DestIsHigh = isHighReg(DestReg); + bool SrcIsHigh = isHighReg(SrcReg); + if (DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBHH; + else if (DestIsHigh && !SrcIsHigh) + Opcode = SystemZ::RISBHL; + else if (!DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBLH; + else { + BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0); + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(DestReg, RegState::Undef) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); +} + +// If MI is a simple load or store for a frame object, return the register +// it loads or stores and set FrameIndex to the index of the frame object. +// Return 0 otherwise. +// +// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, + unsigned Flag) { + const MCInstrDesc &MCID = MI->getDesc(); + if ((MCID.TSFlags & Flag) && + MI->getOperand(1).isFI() && + MI->getOperand(2).getImm() == 0 && + MI->getOperand(3).getReg() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + return 0; +} + +unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad); +} + +unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore); +} + +bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI, + int &DestFrameIndex, + int &SrcFrameIndex) const { + // Check for MVC 0(Length,FI1),0(FI2) + const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo(); + if (MI->getOpcode() != SystemZ::MVC || + !MI->getOperand(0).isFI() || + MI->getOperand(1).getImm() != 0 || + !MI->getOperand(3).isFI() || + MI->getOperand(4).getImm() != 0) + return false; + + // Check that Length covers the full slots. + int64_t Length = MI->getOperand(2).getImm(); + unsigned FI1 = MI->getOperand(0).getIndex(); + unsigned FI2 = MI->getOperand(3).getIndex(); + if (MFI->getObjectSize(FI1) != Length || + MFI->getObjectSize(FI2) != Length) + return false; + + DestFrameIndex = FI1; + SrcFrameIndex = FI2; + return true; +} + +bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // Most of the code and comments here are boilerplate. + + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. + if (!isUnpredicatedTerminator(I)) + break; + + // A terminator that isn't a branch can't easily be handled by this + // analysis. + if (!I->isBranch()) + return true; + + // Can't handle indirect branches. + SystemZII::Branch Branch(getBranchInfo(I)); + if (!Branch.Target->isMBB()) + return true; + + // Punt on compound branches. + if (Branch.Type != SystemZII::BranchNormal) + return true; + + if (Branch.CCMask == SystemZ::CCMASK_ANY) { + // Handle unconditional branches. + if (!AllowModify) { + TBB = Branch.Target->getMBB(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + while (std::next(I) != MBB.end()) + std::next(I)->eraseFromParent(); + + Cond.clear(); + FBB = nullptr; + + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) { + TBB = nullptr; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditinal destination. + TBB = Branch.Target->getMBB(); + continue; + } + + // Working from the bottom, handle the first conditional branch. + if (Cond.empty()) { + // FIXME: add X86-style branch swap + FBB = TBB; + TBB = Branch.Target->getMBB(); + Cond.push_back(MachineOperand::CreateImm(Branch.CCValid)); + Cond.push_back(MachineOperand::CreateImm(Branch.CCMask)); + continue; + } + + // Handle subsequent conditional branches. + assert(Cond.size() == 2 && TBB && "Should have seen a conditional branch"); + + // Only handle the case where all conditional branches branch to the same + // destination. + if (TBB != Branch.Target->getMBB()) + return true; + + // If the conditions are the same, we can leave them alone. + unsigned OldCCValid = Cond[0].getImm(); + unsigned OldCCMask = Cond[1].getImm(); + if (OldCCValid == Branch.CCValid && OldCCMask == Branch.CCMask) + continue; + + // FIXME: Try combining conditions like X86 does. Should be easy on Z! + return false; + } + + return false; +} + +unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + // Most of the code and comments here are boilerplate. + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + if (!I->isBranch()) + break; + if (!getBranchInfo(I).Target->isMBB()) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +bool SystemZInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Invalid condition"); + Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm()); + return false; +} + +unsigned +SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + DebugLoc DL) const { + // In this function we output 32-bit branches, which should always + // have enough range. They can be shortened and relaxed by later code + // in the pipeline, if desired. + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "SystemZ branch conditions have one component!"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB); + return 1; + } + + // Conditional branch. + unsigned Count = 0; + unsigned CCValid = Cond[0].getImm(); + unsigned CCMask = Cond[1].getImm(); + BuildMI(&MBB, DL, get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(TBB); + ++Count; + + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB); + ++Count; + } + return Count; +} + +bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + assert(MI->isCompare() && "Caller should have checked for a comparison"); + + if (MI->getNumExplicitOperands() == 2 && + MI->getOperand(0).isReg() && + MI->getOperand(1).isImm()) { + SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; + Value = MI->getOperand(1).getImm(); + Mask = ~0; + return true; + } + + return false; +} + +// If Reg is a virtual register, return its definition, otherwise return null. +static MachineInstr *getDef(unsigned Reg, + const MachineRegisterInfo *MRI) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return nullptr; + return MRI->getUniqueVRegDef(Reg); +} + +// Return true if MI is a shift of type Opcode by Imm bits. +static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) { + return (MI->getOpcode() == Opcode && + !MI->getOperand(2).getReg() && + MI->getOperand(3).getImm() == Imm); +} + +// If the destination of MI has no uses, delete it as dead. +static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { + if (MRI->use_nodbg_empty(MI->getOperand(0).getReg())) + MI->eraseFromParent(); +} + +// Compare compares SrcReg against zero. Check whether SrcReg contains +// the result of an IPM sequence whose input CC survives until Compare, +// and whether Compare is therefore redundant. Delete it and return +// true if so. +static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + MachineInstr *LGFR = nullptr; + MachineInstr *RLL = getDef(SrcReg, MRI); + if (RLL && RLL->getOpcode() == SystemZ::LGFR) { + LGFR = RLL; + RLL = getDef(LGFR->getOperand(1).getReg(), MRI); + } + if (!RLL || !isShift(RLL, SystemZ::RLL, 31)) + return false; + + MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI); + if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC)) + return false; + + MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI); + if (!IPM || IPM->getOpcode() != SystemZ::IPM) + return false; + + // Check that there are no assignments to CC between the IPM and Compare, + if (IPM->getParent() != Compare->getParent()) + return false; + MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MachineInstr *MI = MBBI; + if (MI->modifiesRegister(SystemZ::CC, TRI)) + return false; + } + + Compare->eraseFromParent(); + if (LGFR) + eraseIfDead(LGFR, MRI); + eraseIfDead(RLL, MRI); + eraseIfDead(SRL, MRI); + eraseIfDead(IPM, MRI); + + return true; +} + +bool +SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const { + assert(!SrcReg2 && "Only optimizing constant comparisons so far"); + bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; + return Value == 0 && !IsLogical && + removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); +} + +// If Opcode is a move that has a conditional variant, return that variant, +// otherwise return 0. +static unsigned getConditionalMove(unsigned Opcode) { + switch (Opcode) { + case SystemZ::LR: return SystemZ::LOCR; + case SystemZ::LGR: return SystemZ::LOCGR; + default: return 0; + } +} + +bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode); +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + BranchProbability Probability) const { + // For now only convert single instructions. + return NumCycles == 1; +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, unsigned ExtraPredCyclesF, + BranchProbability Probability) const { + // For now avoid converting mutually-exclusive cases. + return false; +} + +bool SystemZInstrInfo:: +PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const { + assert(Pred.size() == 2 && "Invalid condition"); + unsigned CCValid = Pred[0].getImm(); + unsigned CCMask = Pred[1].getImm(); + assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); + unsigned Opcode = MI->getOpcode(); + if (STI.hasLoadStoreOnCond()) { + if (unsigned CondOpcode = getConditionalMove(Opcode)) { + MI->setDesc(get(CondOpcode)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + } + return false; +} + +void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + // Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too. + if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) { + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64), + RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc); + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64), + RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc); + return; + } + + if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { + emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc); + return; + } + + // Everything else needs only one instruction. + unsigned Opcode; + if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LGR; + else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LER; + else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LDR; + else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LXR; + else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR32; + else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR64; + else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); +} + +void SystemZInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Callers may expect a single instruction, so keep 128-bit moves + // together for now and lower them after register allocation. + unsigned LoadOpcode, StoreOpcode; + getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); + addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode)) + .addReg(SrcReg, getKillRegState(isKill)), + FrameIdx); +} + +void SystemZInstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Callers may expect a single instruction, so keep 128-bit moves + // together for now and lower them after register allocation. + unsigned LoadOpcode, StoreOpcode; + getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); + addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg), + FrameIdx); +} + +// Return true if MI is a simple load or store with a 12-bit displacement +// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { + const MCInstrDesc &MCID = MI->getDesc(); + return ((MCID.TSFlags & Flag) && + isUInt<12>(MI->getOperand(2).getImm()) && + MI->getOperand(3).getReg() == 0); +} + +namespace { +struct LogicOp { + LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {} + LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize) + : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {} + + explicit operator bool() const { return RegSize; } + + unsigned RegSize, ImmLSB, ImmSize; +}; +} // end anonymous namespace + +static LogicOp interpretAndImmediate(unsigned Opcode) { + switch (Opcode) { + case SystemZ::NILMux: return LogicOp(32, 0, 16); + case SystemZ::NIHMux: return LogicOp(32, 16, 16); + case SystemZ::NILL64: return LogicOp(64, 0, 16); + case SystemZ::NILH64: return LogicOp(64, 16, 16); + case SystemZ::NIHL64: return LogicOp(64, 32, 16); + case SystemZ::NIHH64: return LogicOp(64, 48, 16); + case SystemZ::NIFMux: return LogicOp(32, 0, 32); + case SystemZ::NILF64: return LogicOp(64, 0, 32); + case SystemZ::NIHF64: return LogicOp(64, 32, 32); + default: return LogicOp(); + } +} + +// Used to return from convertToThreeAddress after replacing two-address +// instruction OldMI with three-address instruction NewMI. +static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, + MachineInstr *NewMI, + LiveVariables *LV) { + if (LV) { + unsigned NumOps = OldMI->getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = OldMI->getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI); + } + } + return NewMI; +} + +MachineInstr * +SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + unsigned Opcode = MI->getOpcode(); + unsigned NumOps = MI->getNumOperands(); + + // Try to convert something like SLL into SLLK, if supported. + // We prefer to keep the two-operand form where possible both + // because it tends to be shorter and because some instructions + // have memory forms that can be used during spilling. + if (STI.hasDistinctOps()) { + MachineOperand &Dest = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DestReg = Dest.getReg(); + unsigned SrcReg = Src.getReg(); + // AHIMux is only really a three-operand instruction when both operands + // are low registers. Try to constrain both operands to be low if + // possible. + if (Opcode == SystemZ::AHIMux && + TargetRegisterInfo::isVirtualRegister(DestReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg) && + MRI.getRegClass(DestReg)->contains(SystemZ::R1L) && + MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) { + MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass); + MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass); + } + int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); + if (ThreeOperandOpcode >= 0) { + // Create three address instruction without adding the implicit + // operands. Those will instead be copied over from the original + // instruction by the loop below. + MachineInstrBuilder MIB(*MF, + MF->CreateMachineInstr(get(ThreeOperandOpcode), + MI->getDebugLoc(), /*NoImplicit=*/true)); + MIB.addOperand(Dest); + // Keep the kill state, but drop the tied flag. + MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); + // Keep the remaining operands as-is. + for (unsigned I = 2; I < NumOps; ++I) + MIB.addOperand(MI->getOperand(I)); + MBB->insert(MI, MIB); + return finishConvertToThreeAddress(MI, MIB, LV); + } + } + + // Try to convert an AND into an RISBG-type instruction. + if (LogicOp And = interpretAndImmediate(Opcode)) { + uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB; + // AND IMMEDIATE leaves the other bits of the register unchanged. + Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); + unsigned Start, End; + if (isRxSBGMask(Imm, And.RegSize, Start, End)) { + unsigned NewOpcode; + if (And.RegSize == 64) { + NewOpcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (STI.hasMiscellaneousExtensions()) + NewOpcode = SystemZ::RISBGN; + } else { + NewOpcode = SystemZ::RISBMux; + Start &= 31; + End &= 31; + } + MachineOperand &Dest = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode)) + .addOperand(Dest).addReg(0) + .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()) + .addImm(Start).addImm(End + 128).addImm(0); + return finishConvertToThreeAddress(MI, MIB, LV); + } + } + return nullptr; +} + +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); + unsigned Opcode = MI->getOpcode(); + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && + isInt<8>(MI->getOperand(2).getImm()) && + !MI->getOperand(3).getReg()) { + // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::AGSI)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI->getOperand(2).getImm()); + } + return nullptr; + } + + // All other cases require a single operand. + if (Ops.size() != 1) + return nullptr; + + unsigned OpNum = Ops[0]; + assert(Size == MF.getRegInfo() + .getRegClass(MI->getOperand(OpNum).getReg())->getSize() && + "Invalid size combination"); + + if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && + OpNum == 0 && + isInt<8>(MI->getOperand(2).getImm())) { + // A(G)HI %reg, CONST -> A(G)SI %mem, CONST + Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(Opcode)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI->getOperand(2).getImm()); + } + + if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { + bool Op0IsGPR = (Opcode == SystemZ::LGDR); + bool Op1IsGPR = (Opcode == SystemZ::LDGR); + // If we're spilling the destination of an LDGR or LGDR, store the + // source register instead. + if (OpNum == 0) { + unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(StoreOpcode)) + .addOperand(MI->getOperand(1)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); + } + // If we're spilling the source of an LDGR or LGDR, load the + // destination register instead. + if (OpNum == 1) { + unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; + unsigned Dest = MI->getOperand(0).getReg(); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(LoadOpcode), Dest) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); + } + } + + // Look for cases where the source of a simple store or the destination + // of a simple load is being spilled. Try to use MVC instead. + // + // Although MVC is in practice a fast choice in these cases, it is still + // logically a bytewise copy. This means that we cannot use it if the + // load or store is volatile. We also wouldn't be able to use MVC if + // the two memories partially overlap, but that case cannot occur here, + // because we know that one of the memories is a full frame index. + // + // For performance reasons, we also want to avoid using MVC if the addresses + // might be equal. We don't worry about that case here, because spill slot + // coloring happens later, and because we have special code to remove + // MVCs that turn out to be redundant. + if (OpNum == 0 && MI->hasOneMemOperand()) { + MachineMemOperand *MMO = *MI->memoperands_begin(); + if (MMO->getSize() == Size && !MMO->isVolatile()) { + // Handle conversion of loads. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::MVC)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(Size) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()) + .addMemOperand(MMO); + } + // Handle conversion of stores. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::MVC)) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()) + .addImm(Size) + .addFrameIndex(FrameIndex) + .addImm(0) + .addMemOperand(MMO); + } + } + } + + // If the spilled operand is the final one, try to change <INSN>R + // into <INSN>. + int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode >= 0) { + unsigned NumOps = MI->getNumExplicitOperands(); + if (OpNum == NumOps - 1) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI->getDebugLoc(), get(MemOpcode)); + for (unsigned I = 0; I < OpNum; ++I) + MIB.addOperand(MI->getOperand(I)); + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + return MIB; + } + } + + return nullptr; +} + +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { + return nullptr; +} + +bool +SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + switch (MI->getOpcode()) { + case SystemZ::L128: + splitMove(MI, SystemZ::LG); + return true; + + case SystemZ::ST128: + splitMove(MI, SystemZ::STG); + return true; + + case SystemZ::LX: + splitMove(MI, SystemZ::LD); + return true; + + case SystemZ::STX: + splitMove(MI, SystemZ::STD); + return true; + + case SystemZ::LBMux: + expandRXYPseudo(MI, SystemZ::LB, SystemZ::LBH); + return true; + + case SystemZ::LHMux: + expandRXYPseudo(MI, SystemZ::LH, SystemZ::LHH); + return true; + + case SystemZ::LLCRMux: + expandZExtPseudo(MI, SystemZ::LLCR, 8); + return true; + + case SystemZ::LLHRMux: + expandZExtPseudo(MI, SystemZ::LLHR, 16); + return true; + + case SystemZ::LLCMux: + expandRXYPseudo(MI, SystemZ::LLC, SystemZ::LLCH); + return true; + + case SystemZ::LLHMux: + expandRXYPseudo(MI, SystemZ::LLH, SystemZ::LLHH); + return true; + + case SystemZ::LMux: + expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH); + return true; + + case SystemZ::STCMux: + expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); + return true; + + case SystemZ::STHMux: + expandRXYPseudo(MI, SystemZ::STH, SystemZ::STHH); + return true; + + case SystemZ::STMux: + expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH); + return true; + + case SystemZ::LHIMux: + expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true); + return true; + + case SystemZ::IIFMux: + expandRIPseudo(MI, SystemZ::IILF, SystemZ::IIHF, false); + return true; + + case SystemZ::IILMux: + expandRIPseudo(MI, SystemZ::IILL, SystemZ::IIHL, false); + return true; + + case SystemZ::IIHMux: + expandRIPseudo(MI, SystemZ::IILH, SystemZ::IIHH, false); + return true; + + case SystemZ::NIFMux: + expandRIPseudo(MI, SystemZ::NILF, SystemZ::NIHF, false); + return true; + + case SystemZ::NILMux: + expandRIPseudo(MI, SystemZ::NILL, SystemZ::NIHL, false); + return true; + + case SystemZ::NIHMux: + expandRIPseudo(MI, SystemZ::NILH, SystemZ::NIHH, false); + return true; + + case SystemZ::OIFMux: + expandRIPseudo(MI, SystemZ::OILF, SystemZ::OIHF, false); + return true; + + case SystemZ::OILMux: + expandRIPseudo(MI, SystemZ::OILL, SystemZ::OIHL, false); + return true; + + case SystemZ::OIHMux: + expandRIPseudo(MI, SystemZ::OILH, SystemZ::OIHH, false); + return true; + + case SystemZ::XIFMux: + expandRIPseudo(MI, SystemZ::XILF, SystemZ::XIHF, false); + return true; + + case SystemZ::TMLMux: + expandRIPseudo(MI, SystemZ::TMLL, SystemZ::TMHL, false); + return true; + + case SystemZ::TMHMux: + expandRIPseudo(MI, SystemZ::TMLH, SystemZ::TMHH, false); + return true; + + case SystemZ::AHIMux: + expandRIPseudo(MI, SystemZ::AHI, SystemZ::AIH, false); + return true; + + case SystemZ::AHIMuxK: + expandRIEPseudo(MI, SystemZ::AHI, SystemZ::AHIK, SystemZ::AIH); + return true; + + case SystemZ::AFIMux: + expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); + return true; + + case SystemZ::CFIMux: + expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false); + return true; + + case SystemZ::CLFIMux: + expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false); + return true; + + case SystemZ::CMux: + expandRXYPseudo(MI, SystemZ::C, SystemZ::CHF); + return true; + + case SystemZ::CLMux: + expandRXYPseudo(MI, SystemZ::CL, SystemZ::CLHF); + return true; + + case SystemZ::RISBMux: { + bool DestIsHigh = isHighReg(MI->getOperand(0).getReg()); + bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg()); + if (SrcIsHigh == DestIsHigh) + MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL)); + else { + MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH)); + MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32); + } + return true; + } + + case SystemZ::ADJDYNALLOC: + splitAdjDynAlloc(MI); + return true; + + default: + return false; + } +} + +uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const { + if (MI->getOpcode() == TargetOpcode::INLINEASM) { + const MachineFunction *MF = MI->getParent()->getParent(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + return MI->getDesc().getSize(); +} + +SystemZII::Branch +SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case SystemZ::BR: + case SystemZ::J: + case SystemZ::JG: + return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY, + SystemZ::CCMASK_ANY, &MI->getOperand(0)); + + case SystemZ::BRC: + case SystemZ::BRCL: + return SystemZII::Branch(SystemZII::BranchNormal, + MI->getOperand(0).getImm(), + MI->getOperand(1).getImm(), &MI->getOperand(2)); + + case SystemZ::BRCT: + return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); + + case SystemZ::BRCTG: + return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); + + case SystemZ::CIJ: + case SystemZ::CRJ: + return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + + case SystemZ::CLIJ: + case SystemZ::CLRJ: + return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + + case SystemZ::CGIJ: + case SystemZ::CGRJ: + return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + + case SystemZ::CLGIJ: + case SystemZ::CLGRJ: + return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + + default: + llvm_unreachable("Unrecognized branch opcode"); + } +} + +void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, + unsigned &LoadOpcode, + unsigned &StoreOpcode) const { + if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) { + LoadOpcode = SystemZ::L; + StoreOpcode = SystemZ::ST; + } else if (RC == &SystemZ::GRH32BitRegClass) { + LoadOpcode = SystemZ::LFH; + StoreOpcode = SystemZ::STFH; + } else if (RC == &SystemZ::GRX32BitRegClass) { + LoadOpcode = SystemZ::LMux; + StoreOpcode = SystemZ::STMux; + } else if (RC == &SystemZ::GR64BitRegClass || + RC == &SystemZ::ADDR64BitRegClass) { + LoadOpcode = SystemZ::LG; + StoreOpcode = SystemZ::STG; + } else if (RC == &SystemZ::GR128BitRegClass || + RC == &SystemZ::ADDR128BitRegClass) { + LoadOpcode = SystemZ::L128; + StoreOpcode = SystemZ::ST128; + } else if (RC == &SystemZ::FP32BitRegClass) { + LoadOpcode = SystemZ::LE; + StoreOpcode = SystemZ::STE; + } else if (RC == &SystemZ::FP64BitRegClass) { + LoadOpcode = SystemZ::LD; + StoreOpcode = SystemZ::STD; + } else if (RC == &SystemZ::FP128BitRegClass) { + LoadOpcode = SystemZ::LX; + StoreOpcode = SystemZ::STX; + } else if (RC == &SystemZ::VR32BitRegClass) { + LoadOpcode = SystemZ::VL32; + StoreOpcode = SystemZ::VST32; + } else if (RC == &SystemZ::VR64BitRegClass) { + LoadOpcode = SystemZ::VL64; + StoreOpcode = SystemZ::VST64; + } else if (RC == &SystemZ::VF128BitRegClass || + RC == &SystemZ::VR128BitRegClass) { + LoadOpcode = SystemZ::VL; + StoreOpcode = SystemZ::VST; + } else + llvm_unreachable("Unsupported regclass to load or store"); +} + +unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, + int64_t Offset) const { + const MCInstrDesc &MCID = get(Opcode); + int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset); + if (isUInt<12>(Offset) && isUInt<12>(Offset2)) { + // Get the instruction to use for unsigned 12-bit displacements. + int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode); + if (Disp12Opcode >= 0) + return Disp12Opcode; + + // All address-related instructions can use unsigned 12-bit + // displacements. + return Opcode; + } + if (isInt<20>(Offset) && isInt<20>(Offset2)) { + // Get the instruction to use for signed 20-bit displacements. + int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode); + if (Disp20Opcode >= 0) + return Disp20Opcode; + + // Check whether Opcode allows signed 20-bit displacements. + if (MCID.TSFlags & SystemZII::Has20BitOffset) + return Opcode; + } + return 0; +} + +unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { + switch (Opcode) { + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + case SystemZ::LER: return SystemZ::LTEBR; + case SystemZ::LDR: return SystemZ::LTDBR; + case SystemZ::LXR: return SystemZ::LTXBR; + case SystemZ::LCDFR: return SystemZ::LCDBR; + case SystemZ::LPDFR: return SystemZ::LPDBR; + case SystemZ::LNDFR: return SystemZ::LNDBR; + case SystemZ::LCDFR_32: return SystemZ::LCEBR; + case SystemZ::LPDFR_32: return SystemZ::LPEBR; + case SystemZ::LNDFR_32: return SystemZ::LNEBR; + // On zEC12 we prefer to use RISBGN. But if there is a chance to + // actually use the condition code, we may turn it back into RISGB. + // Note that RISBG is not really a "load-and-test" instruction, + // but sets the same condition code values, so is OK to use here. + case SystemZ::RISBGN: return SystemZ::RISBG; + default: return 0; + } +} + +// Return true if Mask matches the regexp 0*1+0*, given that zero masks +// have already been filtered out. Store the first set bit in LSB and +// the number of set bits in Length if so. +static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) { + unsigned First = findFirstSet(Mask); + uint64_t Top = (Mask >> First) + 1; + if ((Top & -Top) == Top) { + LSB = First; + Length = findFirstSet(Top); + return true; + } + return false; +} + +bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const { + // Reject trivial all-zero masks. + Mask &= allOnes(BitSize); + if (Mask == 0) + return false; + + // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of + // the msb and End specifies the index of the lsb. + unsigned LSB, Length; + if (isStringOfOnes(Mask, LSB, Length)) { + Start = 63 - (LSB + Length - 1); + End = 63 - LSB; + return true; + } + + // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb + // of the low 1s and End specifies the lsb of the high 1s. + if (isStringOfOnes(Mask ^ allOnes(BitSize), LSB, Length)) { + assert(LSB > 0 && "Bottom bit must be set"); + assert(LSB + Length < BitSize && "Top bit must be set"); + Start = 63 - (LSB - 1); + End = 63 - (LSB + Length); + return true; + } + + return false; +} + +unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode, + const MachineInstr *MI) const { + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRJ; + case SystemZ::CGR: + return SystemZ::CGRJ; + case SystemZ::CHI: + return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0; + case SystemZ::CGHI: + return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0; + case SystemZ::CLR: + return SystemZ::CLRJ; + case SystemZ::CLGR: + return SystemZ::CLGRJ; + case SystemZ::CLFI: + return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0; + case SystemZ::CLGFI: + return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0; + default: + return 0; + } +} + +void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Reg, uint64_t Value) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned Opcode; + if (isInt<16>(Value)) + Opcode = SystemZ::LGHI; + else if (SystemZ::isImmLL(Value)) + Opcode = SystemZ::LLILL; + else if (SystemZ::isImmLH(Value)) { + Opcode = SystemZ::LLILH; + Value >>= 16; + } else { + assert(isInt<32>(Value) && "Huge values not handled yet"); + Opcode = SystemZ::LGFI; + } + BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h new file mode 100644 index 0000000..d9094ba --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -0,0 +1,246 @@ +//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H + +#include "SystemZ.h" +#include "SystemZRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "SystemZGenInstrInfo.inc" + +namespace llvm { + +class SystemZTargetMachine; + +namespace SystemZII { +enum { + // See comments in SystemZInstrFormats.td. + SimpleBDXLoad = (1 << 0), + SimpleBDXStore = (1 << 1), + Has20BitOffset = (1 << 2), + HasIndex = (1 << 3), + Is128Bit = (1 << 4), + AccessSizeMask = (31 << 5), + AccessSizeShift = 5, + CCValuesMask = (15 << 10), + CCValuesShift = 10, + CompareZeroCCMaskMask = (15 << 14), + CompareZeroCCMaskShift = 14, + CCMaskFirst = (1 << 18), + CCMaskLast = (1 << 19), + IsLogical = (1 << 20) +}; +static inline unsigned getAccessSize(unsigned int Flags) { + return (Flags & AccessSizeMask) >> AccessSizeShift; +} +static inline unsigned getCCValues(unsigned int Flags) { + return (Flags & CCValuesMask) >> CCValuesShift; +} +static inline unsigned getCompareZeroCCMask(unsigned int Flags) { + return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift; +} + +// SystemZ MachineOperand target flags. +enum { + // Masks out the bits for the access model. + MO_SYMBOL_MODIFIER = (3 << 0), + + // @GOT (aka @GOTENT) + MO_GOT = (1 << 0), + + // @INDNTPOFF + MO_INDNTPOFF = (2 << 0) +}; +// Classifies a branch. +enum BranchType { + // An instruction that branches on the current value of CC. + BranchNormal, + + // An instruction that peforms a 32-bit signed comparison and branches + // on the result. + BranchC, + + // An instruction that peforms a 32-bit unsigned comparison and branches + // on the result. + BranchCL, + + // An instruction that peforms a 64-bit signed comparison and branches + // on the result. + BranchCG, + + // An instruction that peforms a 64-bit unsigned comparison and branches + // on the result. + BranchCLG, + + // An instruction that decrements a 32-bit register and branches if + // the result is nonzero. + BranchCT, + + // An instruction that decrements a 64-bit register and branches if + // the result is nonzero. + BranchCTG +}; +// Information about a branch instruction. +struct Branch { + // The type of the branch. + BranchType Type; + + // CCMASK_<N> is set if CC might be equal to N. + unsigned CCValid; + + // CCMASK_<N> is set if the branch should be taken when CC == N. + unsigned CCMask; + + // The target of the branch. + const MachineOperand *Target; + + Branch(BranchType type, unsigned ccValid, unsigned ccMask, + const MachineOperand *target) + : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {} +}; +} // end namespace SystemZII + +class SystemZSubtarget; +class SystemZInstrInfo : public SystemZGenInstrInfo { + const SystemZRegisterInfo RI; + SystemZSubtarget &STI; + + void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; + void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; + void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode, bool ConvertHigh) const; + void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned LowOpcodeK, unsigned HighOpcode) const; + void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode) const; + void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned Size) const; + void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; + virtual void anchor(); + +public: + explicit SystemZInstrInfo(SystemZSubtarget &STI); + + // Override TargetInstrInfo. + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex, + int &SrcFrameIndex) const override; + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, + DebugLoc DL) const override; + bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, int &Value) const override; + bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int Mask, int Value, + const MachineRegisterInfo *MRI) const override; + bool isPredicable(MachineInstr *MI) const override; + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + BranchProbability Probability) const override; + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, unsigned ExtraPredCyclesF, + BranchProbability Probability) const override; + bool PredicateInstruction(MachineInstr *MI, + ArrayRef<MachineOperand> Pred) const override; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + MachineInstr *LoadMI) const override; + bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const + override; + + // Return the SystemZRegisterInfo, which this class owns. + const SystemZRegisterInfo &getRegisterInfo() const { return RI; } + + // Return the size in bytes of MI. + uint64_t getInstSizeInBytes(const MachineInstr *MI) const; + + // Return true if MI is a conditional or unconditional branch. + // When returning true, set Cond to the mask of condition-code + // values on which the instruction will branch, and set Target + // to the operand that contains the branch target. This target + // can be a register or a basic block. + SystemZII::Branch getBranchInfo(const MachineInstr *MI) const; + + // Get the load and store opcodes for a given register class. + void getLoadStoreOpcodes(const TargetRegisterClass *RC, + unsigned &LoadOpcode, unsigned &StoreOpcode) const; + + // Opcode is the opcode of an instruction that has an address operand, + // and the caller wants to perform that instruction's operation on an + // address that has displacement Offset. Return the opcode of a suitable + // instruction (which might be Opcode itself) or 0 if no such instruction + // exists. + unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + + // If Opcode is a load instruction that has a LOAD AND TEST form, + // return the opcode for the testing form, otherwise return 0. + unsigned getLoadAndTest(unsigned Opcode) const; + + // Return true if ROTATE AND ... SELECTED BITS can be used to select bits + // Mask of the R2 operand, given that only the low BitSize bits of Mask are + // significant. Set Start and End to the I3 and I4 operands if so. + bool isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const; + + // If Opcode is a COMPARE opcode for which an associated COMPARE AND + // BRANCH exists, return the opcode for the latter, otherwise return 0. + // MI, if nonnull, is the compare instruction. + unsigned getCompareAndBranch(unsigned Opcode, + const MachineInstr *MI = nullptr) const; + + // Emit code before MBBI in MI to move immediate value Value into + // physical register Reg. + void loadImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Reg, uint64_t Value) const; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td new file mode 100644 index 0000000..b9f2eb5 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -0,0 +1,1529 @@ +//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt), + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; + +let hasSideEffects = 0 in { + // Takes as input the value of the stack pointer after a dynamic allocation + // has been made. Sets the output to the address of the dynamically- + // allocated area itself, skipping the outgoing arguments. + // + // This expands to an LA or LAY instruction. We restrict the offset + // to the range of LA and keep the LAY range in reserve for when + // the size of the outgoing arguments is added. + def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), + [(set GR64:$dst, dynalloc12only:$src)]>; +} + +//===----------------------------------------------------------------------===// +// Control flow instructions +//===----------------------------------------------------------------------===// + +// A return instruction (br %r14). +let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in + def Return : Alias<2, (outs), (ins), [(z_retflag)]>; + +// Unconditional branches. R1 is the condition-code mask (all 1s). +let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { + let isIndirectBranch = 1 in + def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), + "br\t$R2", [(brind ADDR64:$R2)]>; + + // An assembler extended mnemonic for BRC. + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", + [(br bb:$I2)]>; + + // An assembler extended mnemonic for BRCL. (The extension is "G" + // rather than "L" because "JL" is "Jump if Less".) + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>; +} + +// Conditional branches. It's easier for LLVM to handle these branches +// in their raw BRC/BRCL form, with the 4-bit condition-code mask being +// the first operand. It seems friendlier to use mnemonic forms like +// JE and JLH when writing out the assembly though. +let isBranch = 1, isTerminator = 1, Uses = [CC] in { + let isCodeGenOnly = 1, CCMaskFirst = 1 in { + def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1, + brtarget16:$I2), "j$R1\t$I2", + [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>; + def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1, + brtarget32:$I2), "jg$R1\t$I2", []>; + } + def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2), + "brc\t$R1, $I2", []>; + def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2), + "brcl\t$R1, $I2", []>; + def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2), + "bcr\t$R1, $R2", []>; +} + +// Fused compare-and-branch instructions. As for normal branches, +// we handle these instructions internally in their raw CRJ-like form, +// but use assembly macros like CRJE when writing them out. +// +// These instructions do not use or clobber the condition codes. +// We nevertheless pretend that they clobber CC, so that we can lower +// them to separate comparisons and BRCLs if the branch ends up being +// out of range. +multiclass CompareBranches<Operand ccmask, string pos1, string pos2> { + let isBranch = 1, isTerminator = 1, Defs = [CC] in { + def RJ : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, + brtarget16:$RI4), + "crj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def GRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, + brtarget16:$RI4), + "cgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def IJ : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "cij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, + brtarget16:$RI4), + "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, + brtarget16:$RI4), + "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + } +} +let isCodeGenOnly = 1 in + defm C : CompareBranches<cond4, "$M3", "">; +defm AsmC : CompareBranches<imm32zx4, "", "$M3, ">; + +// Define AsmParser mnemonics for each general condition-code mask +// (integer or floating-point) +multiclass CondExtendedMnemonic<bits<4> ccmask, string name> { + let R1 = ccmask in { + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), + "j"##name##"\t$I2", []>; + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + "jg"##name##"\t$I2", []>; + def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), "b"##name##"r\t$R2", []>; + } + def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>; + def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>; + def LOC : FixedCondUnaryRSY<"loc"##name, 0xEBF2, GR32, ccmask, 4>; + def LOCG : FixedCondUnaryRSY<"locg"##name, 0xEBE2, GR64, ccmask, 8>; + def STOC : FixedCondStoreRSY<"stoc"##name, 0xEBF3, GR32, ccmask, 4>; + def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>; +} +defm AsmO : CondExtendedMnemonic<1, "o">; +defm AsmH : CondExtendedMnemonic<2, "h">; +defm AsmNLE : CondExtendedMnemonic<3, "nle">; +defm AsmL : CondExtendedMnemonic<4, "l">; +defm AsmNHE : CondExtendedMnemonic<5, "nhe">; +defm AsmLH : CondExtendedMnemonic<6, "lh">; +defm AsmNE : CondExtendedMnemonic<7, "ne">; +defm AsmE : CondExtendedMnemonic<8, "e">; +defm AsmNLH : CondExtendedMnemonic<9, "nlh">; +defm AsmHE : CondExtendedMnemonic<10, "he">; +defm AsmNL : CondExtendedMnemonic<11, "nl">; +defm AsmLE : CondExtendedMnemonic<12, "le">; +defm AsmNH : CondExtendedMnemonic<13, "nh">; +defm AsmNO : CondExtendedMnemonic<14, "no">; + +// Define AsmParser mnemonics for each integer condition-code mask. +// This is like the list above, except that condition 3 is not possible +// and that the low bit of the mask is therefore always 0. This means +// that each condition has two names. Conditions "o" and "no" are not used. +// +// We don't make one of the two names an alias of the other because +// we need the custom parsing routines to select the correct register class. +multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> { + let M3 = ccmask in { + def CR : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, + brtarget16:$RI4), + "crj"##name##"\t$R1, $R2, $RI4", []>; + def CGR : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, + brtarget16:$RI4), + "cgrj"##name##"\t$R1, $R2, $RI4", []>; + def CI : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, + brtarget16:$RI4), + "cij"##name##"\t$R1, $I2, $RI4", []>; + def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, + brtarget16:$RI4), + "cgij"##name##"\t$R1, $I2, $RI4", []>; + def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, + brtarget16:$RI4), + "clrj"##name##"\t$R1, $R2, $RI4", []>; + def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, + brtarget16:$RI4), + "clgrj"##name##"\t$R1, $R2, $RI4", []>; + def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, + brtarget16:$RI4), + "clij"##name##"\t$R1, $I2, $RI4", []>; + def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, + brtarget16:$RI4), + "clgij"##name##"\t$R1, $I2, $RI4", []>; + } +} +multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2> + : IntCondExtendedMnemonicA<ccmask, name1> { + let isAsmParserOnly = 1 in + defm Alt : IntCondExtendedMnemonicA<ccmask, name2>; +} +defm AsmJH : IntCondExtendedMnemonic<2, "h", "nle">; +defm AsmJL : IntCondExtendedMnemonic<4, "l", "nhe">; +defm AsmJLH : IntCondExtendedMnemonic<6, "lh", "ne">; +defm AsmJE : IntCondExtendedMnemonic<8, "e", "nlh">; +defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">; +defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">; + +// Decrement a register and branch if it is nonzero. These don't clobber CC, +// but we might need to split long branches into sequences that do. +let Defs = [CC] in { + def BRCT : BranchUnaryRI<"brct", 0xA76, GR32>; + def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>; +} + +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + +def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>; +def Select32 : SelectWrapper<GR32>; +def Select64 : SelectWrapper<GR64>; + +// We don't define 32-bit Mux stores because the low-only STOC should +// always be used if possible. +defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>, + Requires<[FeatureHighWord]>; +defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>, + Requires<[FeatureHighWord]>; +defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm CondStore32 : CondStores<GR32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + +defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32, + nonvolatile_anyextloadi32, bdxaddr20only>; +defm CondStore64 : CondStores<GR64, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + +//===----------------------------------------------------------------------===// +// Call instructions +//===----------------------------------------------------------------------===// + +let isCall = 1, Defs = [R14D, CC] in { + def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops), + [(z_call pcrel32:$I2)]>; + def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops), + [(z_call ADDR64:$R2)]>; +} + +// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards +// are argument registers and since branching to R0 is a no-op. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def CallJG : Alias<6, (outs), (ins pcrel32:$I2), + [(z_sibcall pcrel32:$I2)]>; + let Uses = [R1D] in + def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; +} + +// TLS calls. These will be lowered into a call to __tls_get_offset, +// with an extra relocation specifying the TLS symbol. +let isCall = 1, Defs = [R14D, CC] in { + def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_gdcall tglobaltlsaddr:$I2)]>; + def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_ldcall tglobaltlsaddr:$I2)]>; +} + +// Define the general form of the call instructions for the asm parser. +// These instructions don't hard-code %r14 as the return address register. +// Allow an optional TLS marker symbol to generate TLS call relocations. +def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2), + "bras\t$R1, $I2", []>; +def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2), + "brasl\t$R1, $I2", []>; +def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), + "basr\t$R1, $R2", []>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Register moves. +let hasSideEffects = 0 in { + // Expands to LR, RISBHG or RISBLG, depending on the choice of registers. + def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>, + Requires<[FeatureHighWord]>; + def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>; + def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>; +} +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LTR : UnaryRR <"lt", 0x12, null_frag, GR32, GR32>; + def LTGR : UnaryRRE<"ltg", 0xB902, null_frag, GR64, GR64>; +} + +// Move on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def LOCR : CondUnaryRRF<"loc", 0xB9F2, GR32, GR32>; + def LOCGR : CondUnaryRRF<"locg", 0xB9E2, GR64, GR64>; +} +let Uses = [CC] in { + def AsmLOCR : AsmCondUnaryRRF<"loc", 0xB9F2, GR32, GR32>; + def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>; +} + +// Immediate moves. +let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { + // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF, + // deopending on the choice of register. + def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>, + Requires<[FeatureHighWord]>; + def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>; + def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>; + + // Other 16-bit immediates. + def LLILL : UnaryRI<"llill", 0xA5F, bitconvert, GR64, imm64ll16>; + def LLILH : UnaryRI<"llilh", 0xA5E, bitconvert, GR64, imm64lh16>; + def LLIHL : UnaryRI<"llihl", 0xA5D, bitconvert, GR64, imm64hl16>; + def LLIHH : UnaryRI<"llihh", 0xA5C, bitconvert, GR64, imm64hh16>; + + // 32-bit immediates. + def LGFI : UnaryRIL<"lgfi", 0xC01, bitconvert, GR64, imm64sx32>; + def LLILF : UnaryRIL<"llilf", 0xC0F, bitconvert, GR64, imm64lf32>; + def LLIHF : UnaryRIL<"llihf", 0xC0E, bitconvert, GR64, imm64hf32>; +} + +// Register loads. +let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { + // Expands to L, LY or LFH, depending on the choice of register. + def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>, + Requires<[FeatureHighWord]>; + defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>; + def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>, + Requires<[FeatureHighWord]>; + def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def L128 : Pseudo<(outs GR128:$dst), (ins bdxaddr20only128:$src), + [(set GR128:$dst, (load bdxaddr20only128:$src))]>; + } +} +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>; + def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>; +} + +let canFoldAsLoad = 1 in { + def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; + def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; +} + +// Load on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def LOC : CondUnaryRSY<"loc", 0xEBF2, nonvolatile_load, GR32, 4>; + def LOCG : CondUnaryRSY<"locg", 0xEBE2, nonvolatile_load, GR64, 8>; +} +let Uses = [CC] in { + def AsmLOC : AsmCondUnaryRSY<"loc", 0xEBF2, GR32, 4>; + def AsmLOCG : AsmCondUnaryRSY<"locg", 0xEBE2, GR64, 8>; +} + +// Register stores. +let SimpleBDXStore = 1 in { + // Expands to ST, STY or STFH, depending on the choice of register. + def STMux : StoreRXYPseudo<store, GRX32, 4>, + Requires<[FeatureHighWord]>; + defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; + def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>, + Requires<[FeatureHighWord]>; + def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def ST128 : Pseudo<(outs), (ins GR128:$src, bdxaddr20only128:$dst), + [(store GR128:$src, bdxaddr20only128:$dst)]>; + } +} +def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; +def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + +// Store on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def STOC : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} +let Uses = [CC] in { + def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def AsmSTOCG : AsmCondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} + +// 8-bit immediate stores to 8-bit fields. +defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>; + +// 16-bit immediate stores to 16-, 32- or 64-bit fields. +def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>; +def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>; +def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; + +// Memory-to-memory moves. +let mayLoad = 1, mayStore = 1 in + defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>; + +// String moves. +let mayLoad = 1, mayStore = 1, Defs = [CC] in + defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// +// +// Note that putting these before zero extensions mean that we will prefer +// them for anyextload*. There's not really much to choose between the two +// either way, but signed-extending loads have a short LH and a long LHY, +// while zero-extending loads have only the long LLH. +// +//===----------------------------------------------------------------------===// + +// 32-bit extensions from registers. +let hasSideEffects = 0 in { + def LBR : UnaryRRE<"lb", 0xB926, sext8, GR32, GR32>; + def LHR : UnaryRRE<"lh", 0xB927, sext16, GR32, GR32>; +} + +// 64-bit extensions from registers. +let hasSideEffects = 0 in { + def LGBR : UnaryRRE<"lgb", 0xB906, sext8, GR64, GR64>; + def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>; + def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>; +} +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR32>; + +// Match 32-to-64-bit sign extensions in which the source is already +// in a 64-bit register. +def : Pat<(sext_inreg GR64:$src, i32), + (LGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>; + +// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH, +// depending on the choice of register. +def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>, + Requires<[FeatureHighWord]>; +def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>; +def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH, +// depending on the choice of register. +def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>, + Requires<[FeatureHighWord]>; +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>; +def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>; + +// 64-bit extensions from memory. +def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>; +def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>; +def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>; +def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>; +def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>; +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +// 32-bit extensions from registers. +let hasSideEffects = 0 in { + // Expands to LLCR or RISB[LH]G, depending on the choice of registers. + def LLCRMux : UnaryRRPseudo<"llc", zext8, GRX32, GRX32>, + Requires<[FeatureHighWord]>; + def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>; + // Expands to LLHR or RISB[LH]G, depending on the choice of registers. + def LLHRMux : UnaryRRPseudo<"llh", zext16, GRX32, GRX32>, + Requires<[FeatureHighWord]>; + def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>; +} + +// 64-bit extensions from registers. +let hasSideEffects = 0 in { + def LLGCR : UnaryRRE<"llgc", 0xB984, zext8, GR64, GR64>; + def LLGHR : UnaryRRE<"llgh", 0xB985, zext16, GR64, GR64>; + def LLGFR : UnaryRRE<"llgf", 0xB916, zext32, GR64, GR32>; +} + +// Match 32-to-64-bit zero extensions in which the source is already +// in a 64-bit register. +def : Pat<(and GR64:$src, 0xffffffff), + (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>; + +// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH, +// depending on the choice of register. +def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>, + Requires<[FeatureHighWord]>; +def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>; +def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH, +// depending on the choice of register. +def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>, + Requires<[FeatureHighWord]>; +def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>; +def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>; + +// 64-bit extensions from memory. +def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>; +def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>; +def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>; +def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>; +def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +// Truncations of 64-bit registers to 32-bit registers. +def : Pat<(i32 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, subreg_l32)>; + +// Truncations of 32-bit registers to 8-bit memory. STCMux expands to +// STC, STCY or STCH, depending on the choice of register. +def STCMux : StoreRXYPseudo<truncstorei8, GRX32, 1>, + Requires<[FeatureHighWord]>; +defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>; +def STCH : StoreRXY<"stch", 0xE3C3, truncstorei8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// Truncations of 32-bit registers to 16-bit memory. STHMux expands to +// STH, STHY or STHH, depending on the choice of register. +def STHMux : StoreRXYPseudo<truncstorei16, GRX32, 1>, + Requires<[FeatureHighWord]>; +defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>; +def STHH : StoreRXY<"sthh", 0xE3C7, truncstorei16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; + +// Truncations of 64-bit registers to memory. +defm : StoreGR64Pair<STC, STCY, truncstorei8>; +defm : StoreGR64Pair<STH, STHY, truncstorei16>; +def : StoreGR64PC<STHRL, aligned_truncstorei16>; +defm : StoreGR64Pair<ST, STY, truncstorei32>; +def : StoreGR64PC<STRL, aligned_truncstorei32>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Multi-register loads. +def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>; + +// Multi-register stores. +def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +// Byte-swapping register moves. +let hasSideEffects = 0 in { + def LRVR : UnaryRRE<"lrv", 0xB91F, bswap, GR32, GR32>; + def LRVGR : UnaryRRE<"lrvg", 0xB90F, bswap, GR64, GR64>; +} + +// Byte-swapping loads. Unlike normal loads, these instructions are +// allowed to access storage more than once. +def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>; +def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>; + +// Likewise byte-swapping stores. +def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>; +def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>, + GR64, 8>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +// Load BDX-style addresses. +let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1, + DispKey = "la" in { + let DispSize = "12" in + def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2), + "la\t$R1, $XBD2", + [(set GR64:$R1, laaddr12pair:$XBD2)]>; + let DispSize = "20" in + def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2), + "lay\t$R1, $XBD2", + [(set GR64:$R1, laaddr20pair:$XBD2)]>; +} + +// Load a PC-relative address. There's no version of this instruction +// with a 16-bit offset, so there's no relaxation. +let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { + def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2), + "larl\t$R1, $I2", + [(set GR64:$R1, pcrel32:$I2)]>; +} + +// Load the Global Offset Table address. This will be lowered into a +// larl $R1, _GLOBAL_OFFSET_TABLE_ +// instruction. +def GOT : Alias<6, (outs GR64:$R1), (ins), + [(set GR64:$R1, (global_offset_table))]>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LPR : UnaryRR <"lp", 0x10, z_iabs, GR32, GR32>; + def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>; +} +def : Pat<(z_iabs32 GR32:$src), (LPR GR32:$src)>; +def : Pat<(z_iabs64 GR64:$src), (LPGR GR64:$src)>; +defm : SXU<z_iabs, LPGFR>; +defm : SXU<z_iabs64, LPGFR>; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LNR : UnaryRR <"ln", 0x11, z_inegabs, GR32, GR32>; + def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>; +} +def : Pat<(z_inegabs32 GR32:$src), (LNR GR32:$src)>; +def : Pat<(z_inegabs64 GR64:$src), (LNGR GR64:$src)>; +defm : SXU<z_inegabs, LNGFR>; +defm : SXU<z_inegabs64, LNGFR>; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; +} +defm : SXU<ineg, LCGFR>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>; + +defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>; + +defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>; + +// Insertions of a 16-bit immediate, leaving other bits unaffected. +// We don't have or_as_insert equivalents of these operations because +// OI is available instead. +// +// IIxMux expands to II[LH]x, depending on the choice of register. +def IILMux : BinaryRIPseudo<insertll, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; +def IIHMux : BinaryRIPseudo<insertlh, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; +def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>; +def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>; +def IIHL : BinaryRI<"iihl", 0xA51, insertll, GRH32, imm32ll16>; +def IIHH : BinaryRI<"iihh", 0xA50, insertlh, GRH32, imm32lh16>; +def IILL64 : BinaryAliasRI<insertll, GR64, imm64ll16>; +def IILH64 : BinaryAliasRI<insertlh, GR64, imm64lh16>; +def IIHL64 : BinaryAliasRI<inserthl, GR64, imm64hl16>; +def IIHH64 : BinaryAliasRI<inserthh, GR64, imm64hh16>; + +// ...likewise for 32-bit immediates. For GR32s this is a general +// full-width move. (We use IILF rather than something like LLILF +// for 32-bit moves because IILF leaves the upper 32 bits of the +// GR64 unchanged.) +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { + def IIFMux : UnaryRIPseudo<bitconvert, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; + def IIHF : UnaryRIL<"iihf", 0xC08, bitconvert, GRH32, uimm32>; +} +def IILF64 : BinaryAliasRIL<insertlf, GR64, imm64lf32>; +def IIHF64 : BinaryAliasRIL<inserthf, GR64, imm64hf32>; + +// An alternative model of inserthf, with the first operand being +// a zero-extended value. +def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), + (IIHF64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + imm64hf32:$imm)>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +// Plain addition. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { + // Addition of a register. + let isCommutable = 1 in { + defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>; + defm AGR : BinaryRREAndK<"ag", 0xB908, 0xB9E8, add, GR64, GR64>; + } + def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>; + + // Addition of signed 16-bit immediates. + defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>; + defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>; + defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>; + + // Addition of signed 32-bit immediates. + def AFIMux : BinaryRIPseudo<add, GRX32, simm32>, + Requires<[FeatureHighWord]>; + def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>; + def AIH : BinaryRIL<"aih", 0xCC8, add, GRH32, simm32>, + Requires<[FeatureHighWord]>; + def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>; + + // Addition of memory. + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>; + defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>; + def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>; + def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>; + + // Addition to memory. + def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; + def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>; +} +defm : SXB<add, GR64, AGFR>; + +// Addition producing a carry. +let Defs = [CC] in { + // Addition of a register. + let isCommutable = 1 in { + defm ALR : BinaryRRAndK<"al", 0x1E, 0xB9FA, addc, GR32, GR32>; + defm ALGR : BinaryRREAndK<"alg", 0xB90A, 0xB9EA, addc, GR64, GR64>; + } + def ALGFR : BinaryRRE<"algf", 0xB91A, null_frag, GR64, GR32>; + + // Addition of signed 16-bit immediates. + def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>, + Requires<[FeatureDistinctOps]>; + def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, addc, GR64, imm64sx16>, + Requires<[FeatureDistinctOps]>; + + // Addition of unsigned 32-bit immediates. + def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; + def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; + + // Addition of memory. + defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; + def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>; + def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>; +} +defm : ZXB<addc, GR64, ALGFR>; + +// Addition producing and using a carry. +let Defs = [CC], Uses = [CC] in { + // Addition of a register. + def ALCR : BinaryRRE<"alc", 0xB998, adde, GR32, GR32>; + def ALCGR : BinaryRRE<"alcg", 0xB988, adde, GR64, GR64>; + + // Addition of memory. + def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load, 4>; + def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>; +} + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +// Plain subtraction. Although immediate forms exist, we use the +// add-immediate instruction instead. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { + // Subtraction of a register. + defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>; + def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>; + defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>; + + // Subtraction of memory. + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>; + defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; + def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>; + def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>; +} +defm : SXB<sub, GR64, SGFR>; + +// Subtraction producing a carry. +let Defs = [CC] in { + // Subtraction of a register. + defm SLR : BinaryRRAndK<"sl", 0x1F, 0xB9FB, subc, GR32, GR32>; + def SLGFR : BinaryRRE<"slgf", 0xB91B, null_frag, GR64, GR32>; + defm SLGR : BinaryRREAndK<"slg", 0xB90B, 0xB9EB, subc, GR64, GR64>; + + // Subtraction of unsigned 32-bit immediates. These don't match + // subc because we prefer addc for constants. + def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>; + def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>; + + // Subtraction of memory. + defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>; + def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>; + def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>; +} +defm : ZXB<subc, GR64, SLGFR>; + +// Subtraction producing and using a carry. +let Defs = [CC], Uses = [CC] in { + // Subtraction of a register. + def SLBR : BinaryRRE<"slb", 0xB999, sube, GR32, GR32>; + def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>; + + // Subtraction of memory. + def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>; + def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load, 8>; +} + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + // ANDs of a register. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>; + defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>; + } + + let isConvertibleToThreeAddress = 1 in { + // ANDs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. + // + // NIxMux expands to NI[LH]x, depending on the choice of register. + def NILMux : BinaryRIPseudo<and, GRX32, imm32ll16c>, + Requires<[FeatureHighWord]>; + def NIHMux : BinaryRIPseudo<and, GRX32, imm32lh16c>, + Requires<[FeatureHighWord]>; + def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; + def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + def NIHL : BinaryRI<"nihl", 0xA55, and, GRH32, imm32ll16c>; + def NIHH : BinaryRI<"nihh", 0xA54, and, GRH32, imm32lh16c>; + def NILL64 : BinaryAliasRI<and, GR64, imm64ll16c>; + def NILH64 : BinaryAliasRI<and, GR64, imm64lh16c>; + def NIHL64 : BinaryAliasRI<and, GR64, imm64hl16c>; + def NIHH64 : BinaryAliasRI<and, GR64, imm64hh16c>; + + // ANDs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to NILF or NIHF, depending on the choice of register. + def NIFMux : BinaryRIPseudo<and, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; + def NIHF : BinaryRIL<"nihf", 0xC0A, and, GRH32, uimm32>; + } + def NILF64 : BinaryAliasRIL<and, GR64, imm64lf32c>; + def NIHF64 : BinaryAliasRIL<and, GR64, imm64hf32c>; + } + + // ANDs of memory. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + } + + // AND to memory + defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, imm32zx8>; + + // Block AND. + let mayLoad = 1, mayStore = 1 in + defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>; +} +defm : RMWIByte<and, bdaddr12pair, NI>; +defm : RMWIByte<and, bdaddr20pair, NIY>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + // ORs of a register. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>; + defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>; + } + + // ORs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. + // + // OIxMux expands to OI[LH]x, depending on the choice of register. + def OILMux : BinaryRIPseudo<or, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; + def OIHMux : BinaryRIPseudo<or, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; + def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; + def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; + def OIHL : BinaryRI<"oihl", 0xA59, or, GRH32, imm32ll16>; + def OIHH : BinaryRI<"oihh", 0xA58, or, GRH32, imm32lh16>; + def OILL64 : BinaryAliasRI<or, GR64, imm64ll16>; + def OILH64 : BinaryAliasRI<or, GR64, imm64lh16>; + def OIHL64 : BinaryAliasRI<or, GR64, imm64hl16>; + def OIHH64 : BinaryAliasRI<or, GR64, imm64hh16>; + + // ORs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to OILF or OIHF, depending on the choice of register. + def OIFMux : BinaryRIPseudo<or, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; + def OIHF : BinaryRIL<"oihf", 0xC0C, or, GRH32, uimm32>; + } + def OILF64 : BinaryAliasRIL<or, GR64, imm64lf32>; + def OIHF64 : BinaryAliasRIL<or, GR64, imm64hf32>; + + // ORs of memory. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; + def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + } + + // OR to memory + defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, imm32zx8>; + + // Block OR. + let mayLoad = 1, mayStore = 1 in + defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>; +} +defm : RMWIByte<or, bdaddr12pair, OI>; +defm : RMWIByte<or, bdaddr20pair, OIY>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + // XORs of a register. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>; + defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>; + } + + // XORs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to XILF or XIHF, depending on the choice of register. + def XIFMux : BinaryRIPseudo<xor, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; + def XIHF : BinaryRIL<"xihf", 0xC06, xor, GRH32, uimm32>; + } + def XILF64 : BinaryAliasRIL<xor, GR64, imm64lf32>; + def XIHF64 : BinaryAliasRIL<xor, GR64, imm64hf32>; + + // XORs of memory. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; + def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + } + + // XOR to memory + defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, imm32zx8>; + + // Block XOR. + let mayLoad = 1, mayStore = 1 in + defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>; +} +defm : RMWIByte<xor, bdaddr12pair, XI>; +defm : RMWIByte<xor, bdaddr20pair, XIY>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +// Multiplication of a register. +let isCommutable = 1 in { + def MSR : BinaryRRE<"ms", 0xB252, mul, GR32, GR32>; + def MSGR : BinaryRRE<"msg", 0xB90C, mul, GR64, GR64>; +} +def MSGFR : BinaryRRE<"msgf", 0xB91C, null_frag, GR64, GR32>; +defm : SXB<mul, GR64, MSGFR>; + +// Multiplication of a signed 16-bit immediate. +def MHI : BinaryRI<"mhi", 0xA7C, mul, GR32, imm32sx16>; +def MGHI : BinaryRI<"mghi", 0xA7D, mul, GR64, imm64sx16>; + +// Multiplication of a signed 32-bit immediate. +def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; +def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; + +// Multiplication of memory. +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>; +defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>; +def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; + +// Multiplication of a register, producing two results. +def MLGR : BinaryRRE<"mlg", 0xB986, z_umul_lohi64, GR128, GR64>; + +// Multiplication of memory, producing two results. +def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +// Division and remainder, from registers. +def DSGFR : BinaryRRE<"dsgf", 0xB91D, z_sdivrem32, GR128, GR32>; +def DSGR : BinaryRRE<"dsg", 0xB90D, z_sdivrem64, GR128, GR64>; +def DLR : BinaryRRE<"dl", 0xB997, z_udivrem32, GR128, GR32>; +def DLGR : BinaryRRE<"dlg", 0xB987, z_udivrem64, GR128, GR64>; + +// Division and remainder, from memory. +def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; +def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; +def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; +def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +// Shift left. +let hasSideEffects = 0 in { + defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; + def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; +} + +// Logical shift right. +let hasSideEffects = 0 in { + defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; + def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; +} + +// Arithmetic shift right. +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; + def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; +} + +// Rotate left. +let hasSideEffects = 0 in { + def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; + def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; +} + +// Rotate second operand left and inserted selected bits into first operand. +// These can act like 32-bit operands provided that the constant start and +// end bits (operands 2 and 3) are in the range [32, 64). +let Defs = [CC] in { + let isCodeGenOnly = 1 in + def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; +} + +// On zEC12 we have a variant of RISBG that does not set CC. +let Predicates = [FeatureMiscellaneousExtensions] in + def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>; + +// Forms of RISBG that only affect one word of the destination register. +// They do not set CC. +let Predicates = [FeatureHighWord] in { + def RISBMux : RotateSelectRIEfPseudo<GRX32, GRX32>; + def RISBLL : RotateSelectAliasRIEf<GR32, GR32>; + def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>; + def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>; + def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>; + def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>; + def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>; +} + +// Rotate second operand left and perform a logical operation with selected +// bits of the first operand. The CC result only describes the selected bits, +// so isn't useful for a full comparison against zero. +let Defs = [CC] in { + def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>; + def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>; + def RXSBG : RotateSelectRIEf<"rxsbg", 0xEC57, GR64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +// Signed comparisons. We put these before the unsigned comparisons because +// some of the signed forms have COMPARE AND BRANCH equivalents whereas none +// of the unsigned forms do. +let Defs = [CC], CCValues = 0xE in { + // Comparison with a register. + def CR : CompareRR <"c", 0x19, z_scmp, GR32, GR32>; + def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>; + def CGR : CompareRRE<"cg", 0xB920, z_scmp, GR64, GR64>; + + // Comparison with a signed 16-bit immediate. + def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>; + def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>; + + // Comparison with a signed 32-bit immediate. CFIMux expands to CFI or CIH, + // depending on the choice of register. + def CFIMux : CompareRIPseudo<z_scmp, GRX32, simm32>, + Requires<[FeatureHighWord]>; + def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>; + def CIH : CompareRIL<"cih", 0xCCD, z_scmp, GRH32, simm32>, + Requires<[FeatureHighWord]>; + def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>; + + // Comparison with memory. + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>; + def CMux : CompareRXYPseudo<z_scmp, GRX32, load, 4>, + Requires<[FeatureHighWord]>; + defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>; + def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>, + Requires<[FeatureHighWord]>; + def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>; + def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>; + def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>; + def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>; + def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>; + def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>; + def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>; + + // Comparison between memory and a signed 16-bit immediate. + def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>; + def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>; + def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>; +} +defm : SXB<z_scmp, GR64, CGFR>; + +// Unsigned comparisons. +let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { + // Comparison with a register. + def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>; + def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>; + def CLGR : CompareRRE<"clg", 0xB921, z_ucmp, GR64, GR64>; + + // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI + // or CLIH, depending on the choice of register. + def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>; + def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GRH32, uimm32>, + Requires<[FeatureHighWord]>; + def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; + + // Comparison with memory. + def CLMux : CompareRXYPseudo<z_ucmp, GRX32, load, 4>, + Requires<[FeatureHighWord]>; + defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>; + def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>, + Requires<[FeatureHighWord]>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>; + def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>; + def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, + aligned_azextloadi16>; + def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, + aligned_load>; + def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64, + aligned_azextloadi16>; + def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64, + aligned_azextloadi32>; + def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64, + aligned_load>; + + // Comparison between memory and an unsigned 8-bit immediate. + defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>; + + // Comparison between memory and an unsigned 16-bit immediate. + def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>; + def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>; + def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>; +} +defm : ZXB<z_ucmp, GR64, CLGFR>; + +// Memory-to-memory comparison. +let mayLoad = 1, Defs = [CC] in + defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>; + +// String comparison. +let mayLoad = 1, Defs = [CC] in + defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; + +// Test under mask. +let Defs = [CC] in { + // TMxMux expands to TM[LH]x, depending on the choice of register. + def TMLMux : CompareRIPseudo<z_tm_reg, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; + def TMHMux : CompareRIPseudo<z_tm_reg, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; + def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>; + def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>; + def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GRH32, imm32ll16>; + def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GRH32, imm32lh16>; + + def TMLL64 : CompareAliasRI<z_tm_reg, GR64, imm64ll16>; + def TMLH64 : CompareAliasRI<z_tm_reg, GR64, imm64lh16>; + def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>; + def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>; + + defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>; +} + +//===----------------------------------------------------------------------===// +// Prefetch +//===----------------------------------------------------------------------===// + +def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; +def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; + +let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { + def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>; + def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>; + def LAAL : LoadAndOpRSY<"laal", 0xEBFA, null_frag, GR32>; + def LAALG : LoadAndOpRSY<"laalg", 0xEBEA, null_frag, GR64>; + def LAN : LoadAndOpRSY<"lan", 0xEBF4, atomic_load_and_32, GR32>; + def LANG : LoadAndOpRSY<"lang", 0xEBE4, atomic_load_and_64, GR64>; + def LAO : LoadAndOpRSY<"lao", 0xEBF6, atomic_load_or_32, GR32>; + def LAOG : LoadAndOpRSY<"laog", 0xEBE6, atomic_load_or_64, GR64>; + def LAX : LoadAndOpRSY<"lax", 0xEBF7, atomic_load_xor_32, GR32>; + def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, atomic_load_xor_64, GR64>; +} + +def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>; +def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>; +def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>; + +def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>; +def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>; + def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>; + def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>; + def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>; + def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>; + def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>; +} + +def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>; +def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>; +def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>; + +def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>; +def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>; + def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32, + imm32ll16c>; + def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32, + imm32lh16c>; + def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>; + def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>; + def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64, + imm64ll16c>; + def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64, + imm64lh16c>; + def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64, + imm64hl16c>; + def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64, + imm64hh16c>; + def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64, + imm64lf32c>; + def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64, + imm64hf32c>; +} + +def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>; +def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>; + def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>; + def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>; + def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>; + def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>; + def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>; + def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>; + def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>; + def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>; + def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>; + def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>; +} + +def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>; +def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>; + def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>; + def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>; + def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>; + def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>; +} + +def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>; +def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand, + imm32lh16c>; +def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>; +def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32, + imm32ll16c>; +def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32, + imm32lh16c>; +def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>; +def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>; +def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64ll16c>; +def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64lh16c>; +def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64hl16c>; +def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64hh16c>; +def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64lf32c>; +def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64hf32c>; + +def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>; +def ATOMIC_LOAD_MIN_32 : AtomicLoadBinaryReg32<atomic_load_min_32>; +def ATOMIC_LOAD_MIN_64 : AtomicLoadBinaryReg64<atomic_load_min_64>; + +def ATOMIC_LOADW_MAX : AtomicLoadWBinaryReg<z_atomic_loadw_max>; +def ATOMIC_LOAD_MAX_32 : AtomicLoadBinaryReg32<atomic_load_max_32>; +def ATOMIC_LOAD_MAX_64 : AtomicLoadBinaryReg64<atomic_load_max_64>; + +def ATOMIC_LOADW_UMIN : AtomicLoadWBinaryReg<z_atomic_loadw_umin>; +def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>; +def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>; + +def ATOMIC_LOADW_UMAX : AtomicLoadWBinaryReg<z_atomic_loadw_umax>; +def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>; +def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>; + +def ATOMIC_CMP_SWAPW + : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap, + ADDR32:$bitshift, ADDR32:$negbitshift, + uimm32:$bitsize), + [(set GR32:$dst, + (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap, + ADDR32:$bitshift, ADDR32:$negbitshift, + uimm32:$bitsize))]> { + let Defs = [CC]; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; +} + +let Defs = [CC] in { + defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>; + def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureTransactionalExecution] in { + // Transaction Begin + let hasSideEffects = 1, mayStore = 1, + usesCustomInserter = 1, Defs = [CC] in { + def TBEGIN : InstSIL<0xE560, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbegin\t$BD1, $I2", + [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>; + def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + [(z_tbegin_nofloat bdaddr12only:$BD1, + imm32zx16:$I2)]>; + def TBEGINC : InstSIL<0xE561, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbeginc\t$BD1, $I2", + [(int_s390_tbeginc bdaddr12only:$BD1, + imm32zx16:$I2)]>; + } + + // Transaction End + let hasSideEffects = 1, Defs = [CC], BD2 = 0 in + def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>; + + // Transaction Abort + let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in + def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2), + "tabort\t$BD2", + [(int_s390_tabort bdaddr12only:$BD2)]>; + + // Nontransactional Store + let hasSideEffects = 1 in + def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>; + + // Extract Transaction Nesting Depth + let hasSideEffects = 1 in + def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>; +} + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureProcessorAssist] in { + let hasSideEffects = 1, R4 = 0 in + def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3), + "ppa\t$R1, $R2, $R3", []>; + def : Pat<(int_s390_ppa_txassist GR32:$src), + (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + 0, 1)>; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Extract CC into bits 29 and 28 of a register. +let Uses = [CC] in + def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>; + +// Read a 32-bit access register into a GR32. As with all GR32 operations, +// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful +// when a 64-bit address is stored in a pair of access registers. +def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2), + "ear\t$R1, $R2", + [(set GR32:$R1, (z_extract_access access_reg:$R2))]>; + +// Find leftmost one, AKA count leading zeros. The instruction actually +// returns a pair of GR64s, the first giving the number of leading zeros +// and the second giving a copy of the source with the leftmost one bit +// cleared. We only use the first result here. +let Defs = [CC] in { + def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>; +} +def : Pat<(ctlz GR64:$src), + (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; + +// Population count. Counts bits set per byte. +let Predicates = [FeaturePopulationCount], Defs = [CC] in { + def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2), + "popcnt\t$R1, $R2", + [(set GR64:$R1, (z_popcnt GR64:$R2))]>; +} + +// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; + +// Extend GR32s and GR64s to GR128s. +let usesCustomInserter = 1 in { + def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>; + def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; +} + +// Search a block of memory for a character. +let mayLoad = 1, Defs = [CC] in + defm SRST : StringRRE<"srst", 0xb25e, z_search_string>; + +// Other instructions for inline assembly +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2), + "stck\t$BD2", + []>; +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STCKF : InstS<0xB27C, (outs), (ins bdaddr12only:$BD2), + "stckf\t$BD2", + []>; +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STCKE : InstS<0xB278, (outs), (ins bdaddr12only:$BD2), + "stcke\t$BD2", + []>; +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STFLE : InstS<0xB2B0, (outs), (ins bdaddr12only:$BD2), + "stfle\t$BD2", + []>; + + + +//===----------------------------------------------------------------------===// +// Peepholes. +//===----------------------------------------------------------------------===// + +// Use AL* for GR64 additions of unsigned 32-bit values. +defm : ZXB<add, GR64, ALGFR>; +def : Pat<(add GR64:$src1, imm64zx32:$src2), + (ALGFI GR64:$src1, imm64zx32:$src2)>; +def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), + (ALGF GR64:$src1, bdxaddr20only:$addr)>; + +// Use SL* for GR64 subtractions of unsigned 32-bit values. +defm : ZXB<sub, GR64, SLGFR>; +def : Pat<(add GR64:$src1, imm64zx32n:$src2), + (SLGFI GR64:$src1, imm64zx32n:$src2)>; +def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), + (SLGF GR64:$src1, bdxaddr20only:$addr)>; + +// Optimize sign-extended 1/0 selects to -1/0 selects. This is important +// for vector legalization. +def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)), + (i32 31)), + (i32 31)), + (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>; +def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, + imm32zx4:$cc)))), + (i32 63)), + (i32 63)), + (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>; + +// Peepholes for turning scalar operations into block operations. +defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence, + XCSequence, 1>; +defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence, + XCSequence, 2>; +defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence, + XCSequence, 4>; +defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence, + OCSequence, XCSequence, 1>; +defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence, + XCSequence, 2>; +defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence, + XCSequence, 4>; +defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence, + XCSequence, 8>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td new file mode 100644 index 0000000..c101e43 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -0,0 +1,1097 @@ +//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Register move. + def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; + def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>; + def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>; + + // Load GR from VR element. + def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>; + def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>; + def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>; + def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>; + + // Load VR element from GR. + def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert, + v128b, v128b, GR32, 0>; + def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert, + v128h, v128h, GR32, 1>; + def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert, + v128f, v128f, GR32, 2>; + def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert, + v128g, v128g, GR64, 3>; + + // Load VR from GRs disjoint. + def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>; + def VLVGP32 : BinaryAliasVRRf<GR32>; +} + +// Extractions always assign to the full GR64, even if the element would +// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a +// subreg of the result. +class VectorExtractSubreg<ValueType type, Instruction insn> + : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)), + (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>; + +def : VectorExtractSubreg<v16i8, VLGVB>; +def : VectorExtractSubreg<v8i16, VLGVH>; +def : VectorExtractSubreg<v4i32, VLGVF>; + +//===----------------------------------------------------------------------===// +// Immediate instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Generate byte mask. + def VZERO : InherentVRIa<"vzero", 0xE744, 0>; + def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + + // Generate mask. + def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>; + def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>; + def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>; + def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>; + + // Load element immediate. + // + // We want these instructions to be used ahead of VLVG* where possible. + // However, VLVG* takes a variable BD-format index whereas VLEI takes + // a plain immediate index. This means that VLVG* has an extra "base" + // register operand and is 3 units more complex. Bumping the complexity + // of the VLEI* instructions by 4 means that they are strictly better + // than VLVG* in cases where both forms match. + let AddedComplexity = 4 in { + def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert, + v128b, v128b, imm32sx16trunc, imm32zx4>; + def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert, + v128h, v128h, imm32sx16trunc, imm32zx3>; + def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert, + v128f, v128f, imm32sx16, imm32zx2>; + def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, + v128g, v128g, imm64sx16, imm32zx1>; + } + + // Replicate immediate. + def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; + def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; + def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; + def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; +} + +//===----------------------------------------------------------------------===// +// Loads +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Load. + def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>; + + // Load to block boundary. The number of loaded bytes is only known + // at run time. The instruction is really polymorphic, but v128b matches + // the return type of the associated intrinsic. + def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>; + + // Load count to block boundary. + let Defs = [CC] in + def LCBB : InstRXE<0xE727, (outs GR32:$R1), + (ins bdxaddr12only:$XBD2, imm32zx4:$M3), + "lcbb\t$R1, $XBD2, $M3", + [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, + imm32zx4:$M3))]>; + + // Load with length. The number of loaded bytes is only known at run time. + def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; + + // Load multiple. + def VLM : LoadMultipleVRSa<"vlm", 0xE736>; + + // Load and replicate + def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>; + def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>; + def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>; + def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>; + def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)), + (VLREPF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), + (VLREPG bdxaddr12only:$addr)>; + + // Use VLREP to load subvectors. These patterns use "12pair" because + // LEY and LDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>; + def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>; + + // Load logical element and zero. + def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; + def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; + def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>; + def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>; + def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)), + (VLLEZF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), + (VLLEZG bdxaddr12only:$addr)>; + + // Load element. + def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; + def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>; + def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>; + def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>; + def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index), + (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; + def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index), + (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; + + // Gather element. + def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>; + def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>; +} + +// Use replicating loads if we're inserting a single element into an +// undefined vector. This avoids a false dependency on the previous +// register contents. +multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype, + SDPatternOperator load, ValueType scalartype> { + def : Pat<(vectype (z_vector_insert + (undef), (scalartype (load bdxaddr12only:$addr)), 0)), + (vlrep bdxaddr12only:$addr)>; + def : Pat<(vectype (scalar_to_vector + (scalartype (load bdxaddr12only:$addr)))), + (vlrep bdxaddr12only:$addr)>; +} +defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>; +defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>; +defm : ReplicatePeephole<VLREPF, v4i32, load, i32>; +defm : ReplicatePeephole<VLREPG, v2i64, load, i64>; +defm : ReplicatePeephole<VLREPF, v4f32, load, f32>; +defm : ReplicatePeephole<VLREPG, v2f64, load, f64>; + +//===----------------------------------------------------------------------===// +// Stores +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Store. + def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>; + + // Store with length. The number of stored bytes is only known at run time. + def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>; + + // Store multiple. + def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>; + + // Store element. + def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>; + def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>; + def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>; + def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>; + def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr, + imm32zx2:$index), + (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; + def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr, + imm32zx1:$index), + (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; + + // Use VSTE to store subvectors. These patterns use "12pair" because + // STEY and STDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>; + def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>; + + // Scatter element. + def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; + def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; +} + +//===----------------------------------------------------------------------===// +// Selects and permutes +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Merge high. + def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>; + def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>; + def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>; + def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>; + def : BinaryRRWithType<VMRHF, VR128, z_merge_high, v4f32>; + def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>; + + // Merge low. + def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>; + def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>; + def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>; + def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>; + def : BinaryRRWithType<VMRLF, VR128, z_merge_low, v4f32>; + def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>; + + // Permute. + def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>; + + // Permute doubleword immediate. + def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>; + + // Replicate. + def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>; + def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; + def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; + def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; + def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)), + (VREPF VR128:$vec, imm32zx16:$index)>; + def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), + (VREPG VR128:$vec, imm32zx16:$index)>; + + // Select. + def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>; +} + +//===----------------------------------------------------------------------===// +// Widening and narrowing +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Pack + def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>; + def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>; + def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>; + + // Pack saturate. + defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc, + v128b, v128h, 1>; + defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc, + v128h, v128f, 2>; + defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc, + v128f, v128g, 3>; + + // Pack saturate logical. + defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc, + v128b, v128h, 1>; + defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc, + v128h, v128f, 2>; + defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc, + v128f, v128g, 3>; + + // Sign-extend to doubleword. + def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>; + def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>; + def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>; + def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>; + def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>; + def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>; + + // Unpack high. + def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>; + def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>; + def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>; + + // Unpack logical high. + def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>; + def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>; + def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>; + + // Unpack low. + def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>; + def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>; + def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>; + + // Unpack logical low. + def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>; + def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>; + def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>; +} + +//===----------------------------------------------------------------------===// +// Instantiating generic operations for specific types. +//===----------------------------------------------------------------------===// + +multiclass GenericVectorOps<ValueType type, ValueType inttype> { + let Predicates = [FeatureVector] in { + def : Pat<(type (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (type VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)), + (VSEL VR128:$y, VR128:$z, VR128:$x)>; + def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)), + (VSEL VR128:$z, VR128:$y, VR128:$x)>; + } +} + +defm : GenericVectorOps<v16i8, v16i8>; +defm : GenericVectorOps<v8i16, v8i16>; +defm : GenericVectorOps<v4i32, v4i32>; +defm : GenericVectorOps<v2i64, v2i64>; +defm : GenericVectorOps<v4f32, v4i32>; +defm : GenericVectorOps<v2f64, v2i64>; + +//===----------------------------------------------------------------------===// +// Integer arithmetic +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Add. + def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; + def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; + def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; + def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; + def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; + + // Add compute carry. + def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; + def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; + def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; + def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; + def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; + + // Add with carry. + def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; + + // Add with carry compute carry. + def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; + + // And. + def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; + + // And with complement. + def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; + + // Average. + def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; + def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; + def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; + def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; + + // Average logical. + def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; + def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; + def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; + def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; + + // Checksum. + def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; + + // Count leading zeros. + def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>; + def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>; + def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>; + def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>; + + // Count trailing zeros. + def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>; + def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>; + def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; + def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; + + // Exclusive or. + def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; + + // Galois field multiply sum. + def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>; + def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>; + def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>; + def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>; + + // Galois field multiply sum and accumulate. + def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>; + def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>; + def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>; + def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>; + + // Load complement. + def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>; + def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>; + def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>; + def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>; + + // Load positive. + def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>; + def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>; + def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>; + def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>; + + // Maximum. + def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; + def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; + def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; + def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; + + // Maximum logical. + def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; + def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; + def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; + def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; + + // Minimum. + def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; + def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; + def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; + def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; + + // Minimum logical. + def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; + def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; + def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; + def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; + + // Multiply and add low. + def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>; + def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; + def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; + + // Multiply and add high. + def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; + def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; + def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; + + // Multiply and add logical high. + def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; + def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; + def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; + + // Multiply and add even. + def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; + def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; + def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; + + // Multiply and add logical even. + def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; + def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; + def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; + + // Multiply and add odd. + def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; + def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; + def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; + + // Multiply and add logical odd. + def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; + def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; + def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; + + // Multiply high. + def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; + def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; + def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; + + // Multiply logical high. + def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; + def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; + def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; + + // Multiply low. + def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; + def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; + def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; + + // Multiply even. + def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; + def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; + def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; + + // Multiply logical even. + def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; + def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; + def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; + + // Multiply odd. + def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; + def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; + def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; + + // Multiply logical odd. + def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; + def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; + def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; + + // Nor. + def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; + + // Or. + def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; + + // Population count. + def VPOPCT : BinaryVRRa<"vpopct", 0xE750>; + def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; + + // Element rotate left logical (with vector shift amount). + def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb, + v128b, v128b, 0>; + def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh, + v128h, v128h, 1>; + def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf, + v128f, v128f, 2>; + def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg, + v128g, v128g, 3>; + + // Element rotate left logical (with scalar shift amount). + def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>; + def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>; + def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>; + def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>; + + // Element rotate and insert under mask. + def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>; + def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>; + def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>; + def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>; + + // Element shift left (with vector shift amount). + def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>; + def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>; + def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>; + def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>; + + // Element shift left (with scalar shift amount). + def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>; + def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>; + def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>; + def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>; + + // Element shift right arithmetic (with vector shift amount). + def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>; + def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>; + def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>; + def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>; + + // Element shift right arithmetic (with scalar shift amount). + def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>; + def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>; + def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>; + def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>; + + // Element shift right logical (with vector shift amount). + def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>; + def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>; + def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>; + def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>; + + // Element shift right logical (with scalar shift amount). + def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>; + def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>; + def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>; + def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>; + + // Shift left. + def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>; + + // Shift left by byte. + def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>; + + // Shift left double by byte. + def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; + def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), + (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; + + // Shift right arithmetic. + def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; + + // Shift right arithmetic by byte. + def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>; + + // Shift right logical. + def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>; + + // Shift right logical by byte. + def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; + + // Subtract. + def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; + def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>; + def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>; + def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>; + def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>; + + // Subtract compute borrow indication. + def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>; + def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>; + def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>; + def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>; + def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>; + + // Subtract with borrow indication. + def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>; + + // Subtract with borrow compute borrow indication. + def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq, + v128q, v128q, 4>; + + // Sum across doubleword. + def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>; + def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>; + + // Sum across quadword. + def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>; + def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>; + + // Sum across word. + def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>; + def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>; +} + +// Instantiate the bitwise ops for type TYPE. +multiclass BitwiseVectorOps<ValueType type> { + let Predicates = [FeatureVector] in { + def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>; + def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))), + (VNC VR128:$x, VR128:$y)>; + def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>; + def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>; + def : Pat<(type (or (and VR128:$x, VR128:$z), + (and VR128:$y, (z_vnot VR128:$z)))), + (VSEL VR128:$x, VR128:$y, VR128:$z)>; + def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))), + (VNO VR128:$x, VR128:$y)>; + def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>; + } +} + +defm : BitwiseVectorOps<v16i8>; +defm : BitwiseVectorOps<v8i16>; +defm : BitwiseVectorOps<v4i32>; +defm : BitwiseVectorOps<v2i64>; + +// Instantiate additional patterns for absolute-related expressions on +// type TYPE. LC is the negate instruction for TYPE and LP is the absolute +// instruction. +multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc, + Instruction lp, int shift> { + let Predicates = [FeatureVector] in { + def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)), + (z_vneg VR128:$x), VR128:$x)), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))), + VR128:$x, (z_vneg VR128:$x))), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)), + VR128:$x, (z_vneg VR128:$x))), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))), + (z_vneg VR128:$x), VR128:$x)), + (lc (lp VR128:$x))>; + def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), + (z_vneg VR128:$x)), + (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), + VR128:$x))), + (lp VR128:$x)>; + def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), + VR128:$x), + (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), + (z_vneg VR128:$x)))), + (lc (lp VR128:$x))>; + } +} + +defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>; +defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>; +defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>; +defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>; + +// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the +// signed or unsigned "set if greater than" comparison instruction and +// MIN and MAX are the associated minimum and maximum instructions. +multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph, + Instruction min, Instruction max> { + let Predicates = [FeatureVector] in { + def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)), + (max VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)), + (min VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), + VR128:$x, VR128:$y)), + (min VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), + VR128:$y, VR128:$x)), + (max VR128:$x, VR128:$y)>; + } +} + +// Signed min/max. +defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>; +defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>; +defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>; +defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>; + +// Unsigned min/max. +defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>; +defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>; +defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>; +defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>; + +//===----------------------------------------------------------------------===// +// Integer comparison +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Element compare. + let Defs = [CC] in { + def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>; + def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>; + def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>; + def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>; + } + + // Element compare logical. + let Defs = [CC] in { + def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>; + def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>; + def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>; + def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>; + } + + // Compare equal. + defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes, + v128b, v128b, 0>; + defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes, + v128h, v128h, 1>; + defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes, + v128f, v128f, 2>; + defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes, + v128g, v128g, 3>; + + // Compare high. + defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs, + v128b, v128b, 0>; + defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs, + v128h, v128h, 1>; + defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs, + v128f, v128f, 2>; + defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs, + v128g, v128g, 3>; + + // Compare high logical. + defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls, + v128b, v128b, 0>; + defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls, + v128h, v128h, 1>; + defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls, + v128f, v128f, 2>; + defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls, + v128g, v128g, 3>; + + // Test under mask. + let Defs = [CC] in + def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>; +} + +//===----------------------------------------------------------------------===// +// Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// See comments in SystemZInstrFP.td for the suppression flags and +// rounding modes. +multiclass VectorRounding<Instruction insn, TypedReg tr> { + def : FPConversion<insn, frint, tr, tr, 0, 0>; + def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>; + def : FPConversion<insn, ffloor, tr, tr, 4, 7>; + def : FPConversion<insn, fceil, tr, tr, 4, 6>; + def : FPConversion<insn, ftrunc, tr, tr, 4, 5>; + def : FPConversion<insn, frnd, tr, tr, 4, 1>; +} + +let Predicates = [FeatureVector] in { + // Add. + def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; + + // Convert from fixed 64-bit. + def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; + def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>; + + // Convert from logical 64-bit. + def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; + def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>; + + // Convert to fixed 64-bit. + def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; + def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>; + + // Convert to logical 64-bit. + def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; + def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>; + + // Divide. + def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; + + // Load FP integer. + def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; + def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + defm : VectorRounding<VFIDB, v128db>; + defm : VectorRounding<WFIDB, v64db>; + + // Load lengthened. + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>; + + // Load rounded, + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; + def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; + def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>; + + // Multiply. + def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; + + // Multiply and add. + def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; + + // Multiply and subtract. + def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; + + // Load complement, + def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; + def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; + + // Load negative. + def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; + def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; + + // Load positive. + def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; + def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; + + // Square root. + def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; + + // Subtract. + def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; + + // Test data class immediate. + let Defs = [CC] in { + def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; + def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; + } +} + +//===----------------------------------------------------------------------===// +// Floating-point comparison +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Compare scalar. + let Defs = [CC] in + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + + // Compare and signal scalar. + let Defs = [CC] in + def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; + + // Compare equal. + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128g, v128db, 3, 0>; + defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 8>; + + // Compare high. + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128g, v128db, 3, 0>; + defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 8>; + + // Compare high or equal. + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128g, v128db, 3, 0>; + defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 8>; +} + +//===----------------------------------------------------------------------===// +// Conversions +//===----------------------------------------------------------------------===// + +def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; + +//===----------------------------------------------------------------------===// +// Replicating scalars +//===----------------------------------------------------------------------===// + +// Define patterns for replicating a scalar GR32 into a vector of type TYPE. +// INDEX is 8 minus the element size in bytes. +class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index> + : Pat<(type (z_replicate GR32:$scalar)), + (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>; + +def : VectorReplicateScalar<v16i8, VREPB, 7>; +def : VectorReplicateScalar<v8i16, VREPH, 3>; +def : VectorReplicateScalar<v4i32, VREPF, 1>; + +// i64 replications are just a single isntruction. +def : Pat<(v2i64 (z_replicate GR64:$scalar)), + (VLVGP GR64:$scalar, GR64:$scalar)>; + +//===----------------------------------------------------------------------===// +// Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +// Moving 32-bit values between GPRs and FPRs can be done using VLVGF +// and VLGVF. +def LEFR : UnaryAliasVRS<VR32, GR32>; +def LFER : UnaryAliasVRS<GR64, VR32>; +def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>; +def : Pat<(i32 (bitconvert (f32 VR32:$src))), + (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>; + +// Floating-point values are stored in element 0 of the corresponding +// vector register. Scalar to vector conversion is just a subreg and +// scalar replication can just replicate element 0 of the vector register. +multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls, + SubRegIndex subreg> { + def : Pat<(vt (scalar_to_vector cls:$scalar)), + (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>; + def : Pat<(vt (z_replicate cls:$scalar)), + (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, + subreg), 0)>; +} +defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>; +defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>; + +// Match v2f64 insertions. The AddedComplexity counters the 3 added by +// TableGen for the base register operand in VLVG-based integer insertions +// and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0), + (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_r64), VR128:$vec, 1)>; + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1), + (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_r64), 0)>; +} + +// We extract floating-point element X by replicating (for elements other +// than 0) and then taking a high subreg. The AddedComplexity counters the +// 3 added by TableGen for the base register operand in VLGV-based integer +// extractions and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)), + (EXTRACT_SUBREG VR128:$vec, subreg_r32)>; + def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)), + (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>; + + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)), + (EXTRACT_SUBREG VR128:$vec, subreg_r64)>; + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)), + (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>; +} + +//===----------------------------------------------------------------------===// +// String instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc, + v128b, v128b, 0, 0>; + defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc, + v128h, v128h, 1, 0>; + defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc, + v128f, v128f, 2, 0>; + defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc, + v128b, v128b, 0, 2>; + defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc, + v128h, v128h, 1, 2>; + defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc, + v128f, v128f, 2, 2>; + + defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc, + v128b, v128b, 0, 0, 1>; + defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc, + v128h, v128h, 1, 0, 1>; + defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc, + v128f, v128f, 2, 0, 1>; + defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc, + v128b, v128b, 0, 2, 3>; + defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc, + v128h, v128h, 1, 2, 3>; + defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc, + v128f, v128f, 2, 2, 3>; + + defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc, + v128b, v128b, 0, 0, 1>; + defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc, + v128h, v128h, 1, 0, 1>; + defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc, + v128f, v128f, 2, 0, 1>; + defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb, + z_vfenez_cc, v128b, v128b, 0, 2, 3>; + defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh, + z_vfenez_cc, v128h, v128h, 1, 2, 3>; + defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf, + z_vfenez_cc, v128f, v128f, 2, 2, 3>; + + defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc, + v128b, v128b, 0>; + defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc, + v128h, v128h, 1>; + defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc, + v128f, v128f, 2>; + + defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb, + z_vstrc_cc, v128b, v128b, 0, 0>; + defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch, + z_vstrc_cc, v128h, v128h, 1, 0>; + defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf, + z_vstrc_cc, v128f, v128f, 2, 0>; + defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb, + z_vstrcz_cc, v128b, v128b, 0, 2>; + defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh, + z_vstrcz_cc, v128h, v128h, 1, 2>; + defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, + z_vstrcz_cc, v128f, v128f, 2, 2>; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp new file mode 100644 index 0000000..24165be --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -0,0 +1,143 @@ +//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines multiple accesses to local-dynamic TLS variables so that +// the TLS base address for the module is only fetched once per execution path +// through the function. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "SystemZMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace { + +class SystemZLDCleanup : public MachineFunctionPass { +public: + static char ID; + SystemZLDCleanup(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {} + + const char *getPassName() const override { + return "SystemZ Local Dynamic TLS Access Clean-up"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg); + MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg); + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg); + + const SystemZInstrInfo *TII; + MachineFunction *MF; +}; + +char SystemZLDCleanup::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) { + return new SystemZLDCleanup(TM); +} + +void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); + MF = &F; + + SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); + return VisitNode(DT->getRootNode(), 0); +} + +// Visit the dominator subtree rooted at Node in pre-order. +// If TLSBaseAddrReg is non-null, then use that to replace any +// TLS_LDCALL instructions. Otherwise, create the register +// when the first such instruction is seen, and then use it +// as we encounter more instructions. +bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node, + unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { + switch (I->getOpcode()) { + case SystemZ::TLS_LDCALL: + if (TLSBaseAddrReg) + I = ReplaceTLSCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (auto I = Node->begin(), E = Node->end(); I != E; ++I) + Changed |= VisitNode(*I, TLSBaseAddrReg); + + return Changed; +} + +// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg, +// returning the new instruction. +MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + // Insert a Copy from TLSBaseAddrReg to R2. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), SystemZ::R2D) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_LDCALL instruction. + I->eraseFromParent(); + + return Copy; +} + +// Create a virtal register in *TLSBaseAddrReg, and populate it by +// inserting a copy instruction after I. Returns the new instruction. +MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, + unsigned *TLSBaseAddrReg) { + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass); + + // Insert a copy from R2 to TLSBaseAddrReg. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) + .addReg(SystemZ::R2D); + + return Copy; +} + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp new file mode 100644 index 0000000..8dab44e --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -0,0 +1,460 @@ +//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that all branches are in range. There are several ways +// in which this could be done. One aggressive approach is to assume that all +// branches are in range and successively replace those that turn out not +// to be in range with a longer form (branch relaxation). A simple +// implementation is to continually walk through the function relaxing +// branches until no more changes are needed and a fixed point is reached. +// However, in the pathological worst case, this implementation is +// quadratic in the number of blocks; relaxing branch N can make branch N-1 +// go out of range, which in turn can make branch N-2 go out of range, +// and so on. +// +// An alternative approach is to assume that all branches must be +// converted to their long forms, then reinstate the short forms of +// branches that, even under this pessimistic assumption, turn out to be +// in range (branch shortening). This too can be implemented as a function +// walk that is repeated until a fixed point is reached. In general, +// the result of shortening is not as good as that of relaxation, and +// shortening is also quadratic in the worst case; shortening branch N +// can bring branch N-1 in range of the short form, which in turn can do +// the same for branch N-2, and so on. The main advantage of shortening +// is that each walk through the function produces valid code, so it is +// possible to stop at any point after the first walk. The quadraticness +// could therefore be handled with a maximum pass count, although the +// question then becomes: what maximum count should be used? +// +// On SystemZ, long branches are only needed for functions bigger than 64k, +// which are relatively rare to begin with, and the long branch sequences +// are actually relatively cheap. It therefore doesn't seem worth spending +// much compilation time on the problem. Instead, the approach we take is: +// +// (1) Work out the address that each block would have if no branches +// need relaxing. Exit the pass early if all branches are in range +// according to this assumption. +// +// (2) Work out the address that each block would have if all branches +// need relaxing. +// +// (3) Walk through the block calculating the final address of each instruction +// and relaxing those that need to be relaxed. For backward branches, +// this check uses the final address of the target block, as calculated +// earlier in the walk. For forward branches, this check uses the +// address of the target block that was calculated in (2). Both checks +// give a conservatively-correct range. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-long-branch" + +STATISTIC(LongBranches, "Number of long branches."); + +namespace { +// Represents positional information about a basic block. +struct MBBInfo { + // The address that we currently assume the block has. + uint64_t Address; + + // The size of the block in bytes, excluding terminators. + // This value never changes. + uint64_t Size; + + // The minimum alignment of the block, as a log2 value. + // This value never changes. + unsigned Alignment; + + // The number of terminators in this block. This value never changes. + unsigned NumTerminators; + + MBBInfo() + : Address(0), Size(0), Alignment(0), NumTerminators(0) {} +}; + +// Represents the state of a block terminator. +struct TerminatorInfo { + // If this terminator is a relaxable branch, this points to the branch + // instruction, otherwise it is null. + MachineInstr *Branch; + + // The address that we currently assume the terminator has. + uint64_t Address; + + // The current size of the terminator in bytes. + uint64_t Size; + + // If Branch is nonnull, this is the number of the target block, + // otherwise it is unused. + unsigned TargetBlock; + + // If Branch is nonnull, this is the length of the longest relaxed form, + // otherwise it is zero. + unsigned ExtraRelaxSize; + + TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0), + ExtraRelaxSize(0) {} +}; + +// Used to keep track of the current position while iterating over the blocks. +struct BlockPosition { + // The address that we assume this position has. + uint64_t Address; + + // The number of low bits in Address that are known to be the same + // as the runtime address. + unsigned KnownBits; + + BlockPosition(unsigned InitialAlignment) + : Address(0), KnownBits(InitialAlignment) {} +}; + +class SystemZLongBranch : public MachineFunctionPass { +public: + static char ID; + SystemZLongBranch(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(nullptr) {} + + const char *getPassName() const override { + return "SystemZ Long Branch"; + } + + bool runOnMachineFunction(MachineFunction &F) override; + +private: + void skipNonTerminators(BlockPosition &Position, MBBInfo &Block); + void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, + bool AssumeRelaxed); + TerminatorInfo describeTerminator(MachineInstr *MI); + uint64_t initMBBInfo(); + bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); + bool mustRelaxABranch(); + void setWorstCaseAddresses(); + void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode); + void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode); + void relaxBranch(TerminatorInfo &Terminator); + void relaxBranches(); + + const SystemZInstrInfo *TII; + MachineFunction *MF; + SmallVector<MBBInfo, 16> MBBs; + SmallVector<TerminatorInfo, 16> Terminators; +}; + +char SystemZLongBranch::ID = 0; + +const uint64_t MaxBackwardRange = 0x10000; +const uint64_t MaxForwardRange = 0xfffe; +} // end anonymous namespace + +FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { + return new SystemZLongBranch(TM); +} + +// Position describes the state immediately before Block. Update Block +// accordingly and move Position to the end of the block's non-terminator +// instructions. +void SystemZLongBranch::skipNonTerminators(BlockPosition &Position, + MBBInfo &Block) { + if (Block.Alignment > Position.KnownBits) { + // When calculating the address of Block, we need to conservatively + // assume that Block had the worst possible misalignment. + Position.Address += ((uint64_t(1) << Block.Alignment) - + (uint64_t(1) << Position.KnownBits)); + Position.KnownBits = Block.Alignment; + } + + // Align the addresses. + uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1; + Position.Address = (Position.Address + AlignMask) & ~AlignMask; + + // Record the block's position. + Block.Address = Position.Address; + + // Move past the non-terminators in the block. + Position.Address += Block.Size; +} + +// Position describes the state immediately before Terminator. +// Update Terminator accordingly and move Position past it. +// Assume that Terminator will be relaxed if AssumeRelaxed. +void SystemZLongBranch::skipTerminator(BlockPosition &Position, + TerminatorInfo &Terminator, + bool AssumeRelaxed) { + Terminator.Address = Position.Address; + Position.Address += Terminator.Size; + if (AssumeRelaxed) + Position.Address += Terminator.ExtraRelaxSize; +} + +// Return a description of terminator instruction MI. +TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { + TerminatorInfo Terminator; + Terminator.Size = TII->getInstSizeInBytes(MI); + if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) { + switch (MI->getOpcode()) { + case SystemZ::J: + // Relaxes to JG, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::BRC: + // Relaxes to BRCL, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::BRCT: + case SystemZ::BRCTG: + // Relaxes to A(G)HI and BRCL, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; + case SystemZ::CRJ: + case SystemZ::CLRJ: + // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::CGRJ: + case SystemZ::CLGRJ: + // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer. + Terminator.ExtraRelaxSize = 4; + break; + case SystemZ::CIJ: + case SystemZ::CGIJ: + // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer. + Terminator.ExtraRelaxSize = 4; + break; + case SystemZ::CLIJ: + case SystemZ::CLGIJ: + // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; + default: + llvm_unreachable("Unrecognized branch instruction"); + } + Terminator.Branch = MI; + Terminator.TargetBlock = + TII->getBranchInfo(MI).Target->getMBB()->getNumber(); + } + return Terminator; +} + +// Fill MBBs and Terminators, setting the addresses on the assumption +// that no branches need relaxation. Return the size of the function under +// this assumption. +uint64_t SystemZLongBranch::initMBBInfo() { + MF->RenumberBlocks(); + unsigned NumBlocks = MF->size(); + + MBBs.clear(); + MBBs.resize(NumBlocks); + + Terminators.clear(); + Terminators.reserve(NumBlocks); + + BlockPosition Position(MF->getAlignment()); + for (unsigned I = 0; I < NumBlocks; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(I); + MBBInfo &Block = MBBs[I]; + + // Record the alignment, for quick access. + Block.Alignment = MBB->getAlignment(); + + // Calculate the size of the fixed part of the block. + MachineBasicBlock::iterator MI = MBB->begin(); + MachineBasicBlock::iterator End = MBB->end(); + while (MI != End && !MI->isTerminator()) { + Block.Size += TII->getInstSizeInBytes(MI); + ++MI; + } + skipNonTerminators(Position, Block); + + // Add the terminators. + while (MI != End) { + if (!MI->isDebugValue()) { + assert(MI->isTerminator() && "Terminator followed by non-terminator"); + Terminators.push_back(describeTerminator(MI)); + skipTerminator(Position, Terminators.back(), false); + ++Block.NumTerminators; + } + ++MI; + } + } + + return Position.Address; +} + +// Return true if, under current assumptions, Terminator would need to be +// relaxed if it were placed at address Address. +bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, + uint64_t Address) { + if (!Terminator.Branch) + return false; + + const MBBInfo &Target = MBBs[Terminator.TargetBlock]; + if (Address >= Target.Address) { + if (Address - Target.Address <= MaxBackwardRange) + return false; + } else { + if (Target.Address - Address <= MaxForwardRange) + return false; + } + + return true; +} + +// Return true if, under current assumptions, any terminator needs +// to be relaxed. +bool SystemZLongBranch::mustRelaxABranch() { + for (auto &Terminator : Terminators) + if (mustRelaxBranch(Terminator, Terminator.Address)) + return true; + return false; +} + +// Set the address of each block on the assumption that all branches +// must be long. +void SystemZLongBranch::setWorstCaseAddresses() { + SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); + BlockPosition Position(MF->getAlignment()); + for (auto &Block : MBBs) { + skipNonTerminators(Position, Block); + for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { + skipTerminator(Position, *TI, true); + ++TI; + } + } +} + +// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed +// by a BRCL on the result. +void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI, + unsigned AddOpcode) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(AddOpcode)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addImm(-1); + MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE) + .addOperand(MI->getOperand(2)); + // The implicit use of CC is a killing use. + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); + MI->eraseFromParent(); +} + +// Split MI into the comparison given by CompareOpcode followed +// a BRCL on the result. +void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, + unsigned CompareOpcode) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(CompareOpcode)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(3)); + // The implicit use of CC is a killing use. + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); + MI->eraseFromParent(); +} + +// Relax the branch described by Terminator. +void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { + MachineInstr *Branch = Terminator.Branch; + switch (Branch->getOpcode()) { + case SystemZ::J: + Branch->setDesc(TII->get(SystemZ::JG)); + break; + case SystemZ::BRC: + Branch->setDesc(TII->get(SystemZ::BRCL)); + break; + case SystemZ::BRCT: + splitBranchOnCount(Branch, SystemZ::AHI); + break; + case SystemZ::BRCTG: + splitBranchOnCount(Branch, SystemZ::AGHI); + break; + case SystemZ::CRJ: + splitCompareBranch(Branch, SystemZ::CR); + break; + case SystemZ::CGRJ: + splitCompareBranch(Branch, SystemZ::CGR); + break; + case SystemZ::CIJ: + splitCompareBranch(Branch, SystemZ::CHI); + break; + case SystemZ::CGIJ: + splitCompareBranch(Branch, SystemZ::CGHI); + break; + case SystemZ::CLRJ: + splitCompareBranch(Branch, SystemZ::CLR); + break; + case SystemZ::CLGRJ: + splitCompareBranch(Branch, SystemZ::CLGR); + break; + case SystemZ::CLIJ: + splitCompareBranch(Branch, SystemZ::CLFI); + break; + case SystemZ::CLGIJ: + splitCompareBranch(Branch, SystemZ::CLGFI); + break; + default: + llvm_unreachable("Unrecognized branch"); + } + + Terminator.Size += Terminator.ExtraRelaxSize; + Terminator.ExtraRelaxSize = 0; + Terminator.Branch = nullptr; + + ++LongBranches; +} + +// Run a shortening pass and relax any branches that need to be relaxed. +void SystemZLongBranch::relaxBranches() { + SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); + BlockPosition Position(MF->getAlignment()); + for (auto &Block : MBBs) { + skipNonTerminators(Position, Block); + for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { + assert(Position.Address <= TI->Address && + "Addresses shouldn't go forwards"); + if (mustRelaxBranch(*TI, Position.Address)) + relaxBranch(*TI); + skipTerminator(Position, *TI, false); + ++TI; + } + } +} + +bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); + MF = &F; + uint64_t Size = initMBBInfo(); + if (Size <= MaxForwardRange || !mustRelaxABranch()) + return false; + + setWorstCaseAddresses(); + relaxBranches(); + return true; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp new file mode 100644 index 0000000..2655e48 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -0,0 +1,103 @@ +//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCInstLower.h" +#include "SystemZAsmPrinter.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// Return the VK_* enumeration for MachineOperand target flags Flags. +static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { + switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) { + case 0: + return MCSymbolRefExpr::VK_None; + case SystemZII::MO_GOT: + return MCSymbolRefExpr::VK_GOT; + case SystemZII::MO_INDNTPOFF: + return MCSymbolRefExpr::VK_INDNTPOFF; + } + llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); +} + +SystemZMCInstLower::SystemZMCInstLower(MCContext &ctx, + SystemZAsmPrinter &asmprinter) + : Ctx(ctx), AsmPrinter(asmprinter) {} + +const MCExpr * +SystemZMCInstLower::getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const { + const MCSymbol *Symbol; + bool HasOffset = true; + switch (MO.getType()) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + HasOffset = false; + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = AsmPrinter.getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + HasOffset = false; + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + default: + llvm_unreachable("unknown operand type"); + } + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx); + if (HasOffset) + if (int64_t Offset = MO.getOffset()) { + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); + } + return Expr; +} + +MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { + switch (MO.getType()) { + case MachineOperand::MO_Register: + return MCOperand::createReg(MO.getReg()); + + case MachineOperand::MO_Immediate: + return MCOperand::createImm(MO.getImm()); + + default: { + MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); + return MCOperand::createExpr(getExpr(MO, Kind)); + } + } +} + +void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + // Ignore all implicit register operands. + if (!MO.isReg() || !MO.isImplicit()) + OutMI.addOperand(lowerOperand(MO)); + } +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h new file mode 100644 index 0000000..7173cfa --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h @@ -0,0 +1,44 @@ +//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H + +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCInst; +class MCOperand; +class MachineInstr; +class MachineOperand; +class Mangler; +class SystemZAsmPrinter; + +class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower { + MCContext &Ctx; + SystemZAsmPrinter &AsmPrinter; + +public: + SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmPrinter); + + // Lower MachineInstr MI to MCInst OutMI. + void lower(const MachineInstr *MI, MCInst &OutMI) const; + + // Return an MCOperand for MO. + MCOperand lowerOperand(const MachineOperand& MO) const; + + // Return an MCExpr for symbolic operand MO with variant kind Kind. + const MCExpr *getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp new file mode 100644 index 0000000..1a7c0d7 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp @@ -0,0 +1,17 @@ +//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" + +using namespace llvm; + + +// pin vtable to this file +void SystemZMachineFunctionInfo::anchor() {} + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h new file mode 100644 index 0000000..f4a517b --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -0,0 +1,74 @@ +//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class SystemZMachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + unsigned LowSavedGPR; + unsigned HighSavedGPR; + unsigned VarArgsFirstGPR; + unsigned VarArgsFirstFPR; + unsigned VarArgsFrameIndex; + unsigned RegSaveFrameIndex; + bool ManipulatesSP; + unsigned NumLocalDynamics; + +public: + explicit SystemZMachineFunctionInfo(MachineFunction &MF) + : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), + VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false), + NumLocalDynamics(0) {} + + // Get and set the first call-saved GPR that should be saved and restored + // by this function. This is 0 if no GPRs need to be saved or restored. + unsigned getLowSavedGPR() const { return LowSavedGPR; } + void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; } + + // Get and set the last call-saved GPR that should be saved and restored + // by this function. + unsigned getHighSavedGPR() const { return HighSavedGPR; } + void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; } + + // Get and set the number of fixed (as opposed to variable) arguments + // that are passed in GPRs to this function. + unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } + void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } + + // Likewise FPRs. + unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; } + void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; } + + // Get and set the frame index of the first stack vararg. + unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } + + // Get and set the frame index of the register save area + // (i.e. the incoming stack pointer). + unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; } + void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; } + + // Get and set whether the function directly manipulates the stack pointer, + // e.g. through STACKSAVE or STACKRESTORE. + bool getManipulatesSP() const { return ManipulatesSP; } + void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; } + + // Count number of local-dynamic TLS symbols used. + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td new file mode 100644 index 0000000..9af90d4 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -0,0 +1,568 @@ +//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Class definitions +//===----------------------------------------------------------------------===// + +class ImmediateAsmOperand<string name> + : AsmOperandClass { + let Name = name; + let RenderMethod = "addImmOperands"; +} +class ImmediateTLSAsmOperand<string name> + : AsmOperandClass { + let Name = name; + let RenderMethod = "addImmTLSOperands"; +} + +// Constructs both a DAG pattern and instruction operand for an immediate +// of type VT. PRED returns true if a node is acceptable and XFORM returns +// the operand value associated with the node. ASMOP is the name of the +// associated asm operand, and also forms the basis of the asm print method. +class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> + : PatLeaf<(vt imm), pred, xform>, Operand<vt> { + let PrintMethod = "print"##asmop##"Operand"; + let DecoderMethod = "decode"##asmop##"Operand"; + let ParserMatchClass = !cast<AsmOperandClass>(asmop); +} + +// Constructs an asm operand for a PC-relative address. SIZE says how +// many bits there are. +class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> { + let PredicateMethod = "isImm"; + let ParserMethod = "parsePCRel"##size; +} +class PCRelTLSAsmOperand<string size> + : ImmediateTLSAsmOperand<"PCRelTLS"##size> { + let PredicateMethod = "isImmTLS"; + let ParserMethod = "parsePCRelTLS"##size; +} + +// Constructs an operand for a PC-relative address with address type VT. +// ASMOP is the associated asm operand. +class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelOperand"; + let ParserMatchClass = asmop; +} +class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelTLSOperand"; + let ParserMatchClass = asmop; +} + +// Constructs both a DAG pattern and instruction operand for a PC-relative +// address with address size VT. SELF is the name of the operand and +// ASMOP is the associated asm operand. +class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop> + : ComplexPattern<vt, 1, "selectPCRelAddress", + [z_pcrel_wrapper, z_pcrel_offset]>, + PCRelOperand<vt, asmop> { + let MIOperandInfo = (ops !cast<Operand>(self)); +} + +// Constructs an AsmOperandClass for addressing mode FORMAT, treating the +// registers as having BITSIZE bits and displacements as having DISPSIZE bits. +// LENGTH is "LenN" for addresses with an N-bit length field, otherwise it +// is "". +class AddressAsmOperand<string format, string bitsize, string dispsize, + string length = ""> + : AsmOperandClass { + let Name = format##bitsize##"Disp"##dispsize##length; + let ParserMethod = "parse"##format##bitsize; + let RenderMethod = "add"##format##"Operands"; +} + +// Constructs an instruction operand for an addressing mode. FORMAT, +// BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of individual operands +// (base register, displacement, etc.). +class AddressOperand<string bitsize, string dispsize, string length, + string format, dag operands> + : Operand<!cast<ValueType>("i"##bitsize)> { + let PrintMethod = "print"##format##"Operand"; + let EncoderMethod = "get"##format##dispsize##length##"Encoding"; + let DecoderMethod = + "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; + let MIOperandInfo = operands; + let ParserMatchClass = + !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); +} + +// Constructs both a DAG pattern and instruction operand for an addressing mode. +// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands +// (base register, displacement, etc.). SELTYPE is the type of the memory +// operand for selection purposes; sometimes we want different selection +// choices for the same underlying addressing mode. SUFFIX is similarly +// a suffix appended to the displacement for selection purposes; +// e.g. we want to reject small 20-bit displacements if a 12-bit form +// also exists, but we want to accept them otherwise. +class AddressingMode<string seltype, string bitsize, string dispsize, + string suffix, string length, int numops, string format, + dag operands> + : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, + "select"##seltype##dispsize##suffix##length, + [add, sub, or, frameindex, z_adjdynalloc]>, + AddressOperand<bitsize, dispsize, length, format, operands>; + +// An addressing mode with a base and displacement but no index. +class BDMode<string type, string bitsize, string dispsize, string suffix> + : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>; + +// An addressing mode with a base, displacement and index. +class BDXMode<string type, string bitsize, string dispsize, string suffix> + : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize), + !cast<RegisterOperand>("ADDR"##bitsize))>; + +// A BDMode paired with an immediate length operand of LENSIZE bits. +class BDLMode<string type, string bitsize, string dispsize, string suffix, + string lensize> + : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3, + "BDLAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize), + !cast<Immediate>("imm"##bitsize))>; + +// An addressing mode with a base, displacement and a vector index. +class BDVMode<string bitsize, string dispsize> + : AddressOperand<bitsize, dispsize, "", "BDVAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize), + !cast<RegisterOperand>("VR128"))>; + +//===----------------------------------------------------------------------===// +// Extracting immediate operands from nodes +// These all create MVT::i64 nodes to ensure the value is not sign-extended +// when converted from an SDNode to a MachineOperand later on. +//===----------------------------------------------------------------------===// + +// Bits 0-15 (counting from the lsb). +def LL16 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Bits 16-31 (counting from the lsb). +def LH16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Bits 32-47 (counting from the lsb). +def HL16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Bits 48-63 (counting from the lsb). +def HH16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Low 32 bits. +def LF32 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// High 32 bits. +def HF32 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() >> 32; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit signed quantity. +def SIMM8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit unsigned quantity. +def UIMM8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit. +def UIMM8EVEN : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0xfe, SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 12-bit unsigned quantity. +def UIMM12 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0xfff, SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 16-bit signed quantity. +def SIMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 16-bit unsigned quantity. +def UIMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 32-bit signed quantity. +def SIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 32-bit unsigned quantity. +def UIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Negate and then truncate an immediate to a 32-bit unsigned quantity. +def NEGIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +//===----------------------------------------------------------------------===// +// Immediate asm operands. +//===----------------------------------------------------------------------===// + +def U1Imm : ImmediateAsmOperand<"U1Imm">; +def U2Imm : ImmediateAsmOperand<"U2Imm">; +def U3Imm : ImmediateAsmOperand<"U3Imm">; +def U4Imm : ImmediateAsmOperand<"U4Imm">; +def U6Imm : ImmediateAsmOperand<"U6Imm">; +def S8Imm : ImmediateAsmOperand<"S8Imm">; +def U8Imm : ImmediateAsmOperand<"U8Imm">; +def U12Imm : ImmediateAsmOperand<"U12Imm">; +def S16Imm : ImmediateAsmOperand<"S16Imm">; +def U16Imm : ImmediateAsmOperand<"U16Imm">; +def S32Imm : ImmediateAsmOperand<"S32Imm">; +def U32Imm : ImmediateAsmOperand<"U32Imm">; + +//===----------------------------------------------------------------------===// +// i32 immediates +//===----------------------------------------------------------------------===// + +// Immediates for the lower and upper 16 bits of an i32, with the other +// bits of the i32 being zero. +def imm32ll16 : Immediate<i32, [{ + return SystemZ::isImmLL(N->getZExtValue()); +}], LL16, "U16Imm">; + +def imm32lh16 : Immediate<i32, [{ + return SystemZ::isImmLH(N->getZExtValue()); +}], LH16, "U16Imm">; + +// Immediates for the lower and upper 16 bits of an i32, with the other +// bits of the i32 being one. +def imm32ll16c : Immediate<i32, [{ + return SystemZ::isImmLL(uint32_t(~N->getZExtValue())); +}], LL16, "U16Imm">; + +def imm32lh16c : Immediate<i32, [{ + return SystemZ::isImmLH(uint32_t(~N->getZExtValue())); +}], LH16, "U16Imm">; + +// Short immediates +def imm32zx1 : Immediate<i32, [{ + return isUInt<1>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U1Imm">; + +def imm32zx2 : Immediate<i32, [{ + return isUInt<2>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U2Imm">; + +def imm32zx3 : Immediate<i32, [{ + return isUInt<3>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U3Imm">; + +def imm32zx4 : Immediate<i32, [{ + return isUInt<4>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U4Imm">; + +// Note: this enforces an even value during code generation only. +// When used from the assembler, any 4-bit value is allowed. +def imm32zx4even : Immediate<i32, [{ + return isUInt<4>(N->getZExtValue()); +}], UIMM8EVEN, "U4Imm">; + +def imm32zx6 : Immediate<i32, [{ + return isUInt<6>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U6Imm">; + +def imm32sx8 : Immediate<i32, [{ + return isInt<8>(N->getSExtValue()); +}], SIMM8, "S8Imm">; + +def imm32zx8 : Immediate<i32, [{ + return isUInt<8>(N->getZExtValue()); +}], UIMM8, "U8Imm">; + +def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">; + +def imm32zx12 : Immediate<i32, [{ + return isUInt<12>(N->getZExtValue()); +}], UIMM12, "U12Imm">; + +def imm32sx16 : Immediate<i32, [{ + return isInt<16>(N->getSExtValue()); +}], SIMM16, "S16Imm">; + +def imm32zx16 : Immediate<i32, [{ + return isUInt<16>(N->getZExtValue()); +}], UIMM16, "U16Imm">; + +def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">; + +// Full 32-bit immediates. we need both signed and unsigned versions +// because the assembler is picky. E.g. AFI requires signed operands +// while NILF requires unsigned ones. +def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">; +def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">; + +def imm32 : ImmLeaf<i32, [{}]>; + +//===----------------------------------------------------------------------===// +// 64-bit immediates +//===----------------------------------------------------------------------===// + +// Immediates for 16-bit chunks of an i64, with the other bits of the +// i32 being zero. +def imm64ll16 : Immediate<i64, [{ + return SystemZ::isImmLL(N->getZExtValue()); +}], LL16, "U16Imm">; + +def imm64lh16 : Immediate<i64, [{ + return SystemZ::isImmLH(N->getZExtValue()); +}], LH16, "U16Imm">; + +def imm64hl16 : Immediate<i64, [{ + return SystemZ::isImmHL(N->getZExtValue()); +}], HL16, "U16Imm">; + +def imm64hh16 : Immediate<i64, [{ + return SystemZ::isImmHH(N->getZExtValue()); +}], HH16, "U16Imm">; + +// Immediates for 16-bit chunks of an i64, with the other bits of the +// i32 being one. +def imm64ll16c : Immediate<i64, [{ + return SystemZ::isImmLL(uint64_t(~N->getZExtValue())); +}], LL16, "U16Imm">; + +def imm64lh16c : Immediate<i64, [{ + return SystemZ::isImmLH(uint64_t(~N->getZExtValue())); +}], LH16, "U16Imm">; + +def imm64hl16c : Immediate<i64, [{ + return SystemZ::isImmHL(uint64_t(~N->getZExtValue())); +}], HL16, "U16Imm">; + +def imm64hh16c : Immediate<i64, [{ + return SystemZ::isImmHH(uint64_t(~N->getZExtValue())); +}], HH16, "U16Imm">; + +// Immediates for the lower and upper 32 bits of an i64, with the other +// bits of the i32 being zero. +def imm64lf32 : Immediate<i64, [{ + return SystemZ::isImmLF(N->getZExtValue()); +}], LF32, "U32Imm">; + +def imm64hf32 : Immediate<i64, [{ + return SystemZ::isImmHF(N->getZExtValue()); +}], HF32, "U32Imm">; + +// Immediates for the lower and upper 32 bits of an i64, with the other +// bits of the i32 being one. +def imm64lf32c : Immediate<i64, [{ + return SystemZ::isImmLF(uint64_t(~N->getZExtValue())); +}], LF32, "U32Imm">; + +def imm64hf32c : Immediate<i64, [{ + return SystemZ::isImmHF(uint64_t(~N->getZExtValue())); +}], HF32, "U32Imm">; + +// Short immediates. +def imm64sx8 : Immediate<i64, [{ + return isInt<8>(N->getSExtValue()); +}], SIMM8, "S8Imm">; + +def imm64zx8 : Immediate<i64, [{ + return isUInt<8>(N->getSExtValue()); +}], UIMM8, "U8Imm">; + +def imm64sx16 : Immediate<i64, [{ + return isInt<16>(N->getSExtValue()); +}], SIMM16, "S16Imm">; + +def imm64zx16 : Immediate<i64, [{ + return isUInt<16>(N->getZExtValue()); +}], UIMM16, "U16Imm">; + +def imm64sx32 : Immediate<i64, [{ + return isInt<32>(N->getSExtValue()); +}], SIMM32, "S32Imm">; + +def imm64zx32 : Immediate<i64, [{ + return isUInt<32>(N->getZExtValue()); +}], UIMM32, "U32Imm">; + +def imm64zx32n : Immediate<i64, [{ + return isUInt<32>(-N->getSExtValue()); +}], NEGIMM32, "U32Imm">; + +def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>; + +//===----------------------------------------------------------------------===// +// Floating-point immediates +//===----------------------------------------------------------------------===// + +// Floating-point zero. +def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; + +// Floating point negative zero. +def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; + +//===----------------------------------------------------------------------===// +// Symbolic address operands +//===----------------------------------------------------------------------===// + +// PC-relative asm operands. +def PCRel16 : PCRelAsmOperand<"16">; +def PCRel32 : PCRelAsmOperand<"32">; +def PCRelTLS16 : PCRelTLSAsmOperand<"16">; +def PCRelTLS32 : PCRelTLSAsmOperand<"32">; + +// PC-relative offsets of a basic block. The offset is sign-extended +// and multiplied by 2. +def brtarget16 : PCRelOperand<OtherVT, PCRel16> { + let EncoderMethod = "getPC16DBLEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; +} +def brtarget32 : PCRelOperand<OtherVT, PCRel32> { + let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; +} + +// Variants of brtarget16/32 with an optional additional TLS symbol. +// These are used to annotate calls to __tls_get_offset. +def tlssym : Operand<i64> { } +def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> { + let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym); + let EncoderMethod = "getPC16DBLTLSEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; +} +def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> { + let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym); + let EncoderMethod = "getPC32DBLTLSEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; +} + +// A PC-relative offset of a global value. The offset is sign-extended +// and multiplied by 2. +def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> { + let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; +} + +//===----------------------------------------------------------------------===// +// Addressing modes +//===----------------------------------------------------------------------===// + +// 12-bit displacement operands. +def disp12imm32 : Operand<i32>; +def disp12imm64 : Operand<i64>; + +// 20-bit displacement operands. +def disp20imm32 : Operand<i32>; +def disp20imm64 : Operand<i64>; + +def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; +def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; +def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; +def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; +def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; +def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; +def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">; + +// DAG patterns and operands for addressing modes. Each mode has +// the form <type><range><group>[<len>] where: +// +// <type> is one of: +// shift : base + displacement (32-bit) +// bdaddr : base + displacement +// mviaddr : like bdaddr, but reject cases with a natural index +// bdxaddr : base + displacement + index +// laaddr : like bdxaddr, but used for Load Address operations +// dynalloc : base + displacement + index + ADJDYNALLOC +// bdladdr : base + displacement with a length field +// bdvaddr : base + displacement with a vector index +// +// <range> is one of: +// 12 : the displacement is an unsigned 12-bit value +// 20 : the displacement is a signed 20-bit value +// +// <group> is one of: +// pair : used when there is an equivalent instruction with the opposite +// range value (12 or 20) +// only : used when there is no equivalent instruction with the opposite +// range value +// +// <len> is one of: +// +// <empty> : there is no length field +// len8 : the length field is 8 bits, with a range of [1, 0x100]. +def shift12only : BDMode <"BDAddr", "32", "12", "Only">; +def shift20only : BDMode <"BDAddr", "32", "20", "Only">; +def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; +def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; +def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; +def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; +def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">; +def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">; +def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; +def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; +def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; +def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; +def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; +def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; +def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; +def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; +def bdvaddr12only : BDVMode< "64", "12">; + +//===----------------------------------------------------------------------===// +// Miscellaneous +//===----------------------------------------------------------------------===// + +// Access registers. At present we just use them for accessing the thread +// pointer, so we don't expose them as register to LLVM. +def AccessReg : AsmOperandClass { + let Name = "AccessReg"; + let ParserMethod = "parseAccessReg"; +} +def access_reg : Immediate<i32, [{ return N->getZExtValue() < 16; }], + NOOP_SDNodeXForm, "AccessReg"> { + let ParserMatchClass = AccessReg; +} + +// A 4-bit condition-code mask. +def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>, + Operand<i32> { + let PrintMethod = "printCond4Operand"; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td new file mode 100644 index 0000000..3c95a1e --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -0,0 +1,663 @@ +//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Type profiles +//===----------------------------------------------------------------------===// +def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>]>; +def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, + SDTCisVT<1, i64>]>; +def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_ZICmp : SDTypeProfile<0, 3, + [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDT_ZBRCCMask : SDTypeProfile<0, 3, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, OtherVT>]>; +def SDT_ZSelectCCMask : SDTypeProfile<1, 4, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; +def SDT_ZWrapPtr : SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; +def SDT_ZWrapOffset : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisPtrTy<0>]>; +def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def SDT_ZExtractAccess : SDTypeProfile<1, 1, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_ZGR128Binary32 : SDTypeProfile<1, 2, + [SDTCisVT<0, untyped>, + SDTCisVT<1, untyped>, + SDTCisVT<2, i32>]>; +def SDT_ZGR128Binary64 : SDTypeProfile<1, 2, + [SDTCisVT<0, untyped>, + SDTCisVT<1, untyped>, + SDTCisVT<2, i64>]>; +def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i32>]>; +def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i32>, + SDTCisVT<6, i32>]>; +def SDT_ZMemMemLength : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i64>]>; +def SDT_ZMemMemLoop : SDTypeProfile<0, 4, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i64>, + SDTCisVT<3, i64>]>; +def SDT_ZString : SDTypeProfile<1, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i32>]>; +def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_ZPrefetch : SDTypeProfile<0, 2, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>]>; +def SDT_ZTBegin : SDTypeProfile<0, 2, + [SDTCisPtrTy<0>, + SDTCisVT<1, i32>]>; +def SDT_ZInsertVectorElt : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<3, i32>]>; +def SDT_ZExtractVectorElt : SDTypeProfile<1, 2, + [SDTCisVec<1>, + SDTCisVT<2, i32>]>; +def SDT_ZReplicate : SDTypeProfile<1, 1, + [SDTCisVec<0>]>; +def SDT_ZVecUnaryConv : SDTypeProfile<1, 1, + [SDTCisVec<0>, + SDTCisVec<1>]>; +def SDT_ZVecUnary : SDTypeProfile<1, 1, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>]>; +def SDT_ZVecBinary : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDT_ZVecBinaryConv : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisSameAs<1, 2>]>; +def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisVT<2, i32>]>; +def SDT_ZRotateMask : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; +def SDT_ZJoinDwords : SDTypeProfile<1, 2, + [SDTCisVT<0, v2i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>]>; +def SDT_ZVecTernary : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; +def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisVT<4, i32>]>; + +//===----------------------------------------------------------------------===// +// Node definitions +//===----------------------------------------------------------------------===// + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, + [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, + SDNPOutGlue]>; +def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; + +// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. +def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; +def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; +def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; +def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; +def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; +def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", + SDT_ZWrapOffset, []>; +def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>; +def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>; +def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>; +def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>; +def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, + [SDNPHasChain, SDNPInGlue]>; +def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, + [SDNPInGlue]>; +def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; +def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", + SDT_ZExtractAccess>; +def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; +def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; +def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; +def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; +def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; +def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; + +def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, + [SDNPHasChain, SDNPMayStore]>; + +// Defined because the index is an i32 rather than a pointer. +def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", + SDT_ZInsertVectorElt>; +def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", + SDT_ZExtractVectorElt>; +def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; +def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; +def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; +def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; +def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>; +def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>; +def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>; +def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>; +def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", + SDT_ZVecTernaryInt>; +def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; +def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; +def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>; +def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>; +def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>; +def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>; +def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>; +def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; +def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; +def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; +def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; +def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; +def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; +def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; +def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; +def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>; +def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt, + [SDNPOutGlue]>; +def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt, + [SDNPOutGlue]>; +def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary, + [SDNPOutGlue]>; +def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt, + [SDNPOutGlue]>; +def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", + SDT_ZVecQuaternaryInt, [SDNPOutGlue]>; +def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt, + [SDNPOutGlue]>; + +class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> + : SDNode<"SystemZISD::"##name, profile, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + +def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">; +def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">; +def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">; +def z_atomic_loadw_and : AtomicWOp<"ATOMIC_LOADW_AND">; +def z_atomic_loadw_or : AtomicWOp<"ATOMIC_LOADW_OR">; +def z_atomic_loadw_xor : AtomicWOp<"ATOMIC_LOADW_XOR">; +def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">; +def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">; +def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">; +def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; +def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; +def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; + +def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, + [SDNPInGlue]>; +def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; + +def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tend : SDNode<"SystemZISD::TEND", SDTNone, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + +def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>; +def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>; +def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>; + +//===----------------------------------------------------------------------===// +// Pattern fragments +//===----------------------------------------------------------------------===// + +// Signed and unsigned comparisons. +def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ + unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + return Type != SystemZICMP::UnsignedOnly; +}]>; +def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ + unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + return Type != SystemZICMP::SignedOnly; +}]>; + +// Register- and memory-based TEST UNDER MASK. +def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>; +def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; + +// Register sign-extend operations. Sub-32-bit values are represented as i32s. +def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>; +def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; +def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>; + +// Match extensions of an i32 to an i64, followed by an in-register sign +// extension from a sub-i32 value. +def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>; +def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>; + +// Register zero-extend operations. Sub-32-bit values are represented as i32s. +def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>; +def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>; +def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; + +// Match extensions of an i32 to an i64, followed by an AND of the low +// i8 or i16 part. +def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>; +def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>; + +// Typed floating-point loads. +def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; +def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; + +// Extending loads in which the extension type can be signed. +def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + unsigned Type = cast<LoadSDNode>(N)->getExtensionType(); + return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD; +}]>; +def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending loads in which the extension type can be unsigned. +def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + unsigned Type = cast<LoadSDNode>(N)->getExtensionType(); + return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD; +}]>; +def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending loads in which the extension type doesn't matter. +def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD; +}]>; +def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Aligned loads. +class AlignedLoad<SDPatternOperator load> + : PatFrag<(ops node:$addr), (load node:$addr), [{ + auto *Load = cast<LoadSDNode>(N); + return Load->getAlignment() >= Load->getMemoryVT().getStoreSize(); +}]>; +def aligned_load : AlignedLoad<load>; +def aligned_asextloadi16 : AlignedLoad<asextloadi16>; +def aligned_asextloadi32 : AlignedLoad<asextloadi32>; +def aligned_azextloadi16 : AlignedLoad<azextloadi16>; +def aligned_azextloadi32 : AlignedLoad<azextloadi32>; + +// Aligned stores. +class AlignedStore<SDPatternOperator store> + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{ + auto *Store = cast<StoreSDNode>(N); + return Store->getAlignment() >= Store->getMemoryVT().getStoreSize(); +}]>; +def aligned_store : AlignedStore<store>; +def aligned_truncstorei16 : AlignedStore<truncstorei16>; +def aligned_truncstorei32 : AlignedStore<truncstorei32>; + +// Non-volatile loads. Used for instructions that might access the storage +// location multiple times. +class NonvolatileLoad<SDPatternOperator load> + : PatFrag<(ops node:$addr), (load node:$addr), [{ + auto *Load = cast<LoadSDNode>(N); + return !Load->isVolatile(); +}]>; +def nonvolatile_load : NonvolatileLoad<load>; +def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>; +def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>; +def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>; + +// Non-volatile stores. +class NonvolatileStore<SDPatternOperator store> + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{ + auto *Store = cast<StoreSDNode>(N); + return !Store->isVolatile(); +}]>; +def nonvolatile_store : NonvolatileStore<store>; +def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>; +def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>; +def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>; + +// A store of a load that can be implemented using MVC. +def mvc_store : PatFrag<(ops node:$value, node:$addr), + (unindexedstore node:$value, node:$addr), + [{ return storeLoadCanUseMVC(N); }]>; + +// Binary read-modify-write operations on memory in which the other +// operand is also memory and for which block operations like NC can +// be used. There are two patterns for each operator, depending on +// which operand contains the "other" load. +multiclass block_op<SDPatternOperator operator> { + def "1" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator node:$value, + (unindexedload node:$addr)), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 0); }]>; + def "2" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator (unindexedload node:$addr), + node:$value), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 1); }]>; +} +defm block_and : block_op<and>; +defm block_or : block_op<or>; +defm block_xor : block_op<xor>; + +// Insertions. +def inserti8 : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, -256), node:$src2)>; +def insertll : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffffffff0000), node:$src2)>; +def insertlh : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffff0000ffff), node:$src2)>; +def inserthl : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffff0000ffffffff), node:$src2)>; +def inserthh : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0x0000ffffffffffff), node:$src2)>; +def insertlf : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffff00000000), node:$src2)>; +def inserthf : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0x00000000ffffffff), node:$src2)>; + +// ORs that can be treated as insertions. +def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, node:$src2), [{ + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + return CurDAG->MaskedValueIsZero(N->getOperand(0), + APInt::getLowBitsSet(BitWidth, 8)); +}]>; + +// ORs that can be treated as reversed insertions. +def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, node:$src2), [{ + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + return CurDAG->MaskedValueIsZero(N->getOperand(1), + APInt::getLowBitsSet(BitWidth, 8)); +}]>; + +// Negative integer absolute. +def z_inegabs : PatFrag<(ops node:$src), (ineg (z_iabs node:$src))>; + +// Integer absolute, matching the canonical form generated by DAGCombiner. +def z_iabs32 : PatFrag<(ops node:$src), + (xor (add node:$src, (sra node:$src, (i32 31))), + (sra node:$src, (i32 31)))>; +def z_iabs64 : PatFrag<(ops node:$src), + (xor (add node:$src, (sra node:$src, (i32 63))), + (sra node:$src, (i32 63)))>; +def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>; +def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>; + +// Integer multiply-and-add +def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (add (mul node:$src1, node:$src2), node:$src3)>; + +// Fused multiply-subtract, using the natural operand order. +def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fma node:$src1, node:$src2, (fneg node:$src3))>; + +// Fused multiply-add and multiply-subtract, but with the order of the +// operands matching SystemZ's MA and MS instructions. +def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fma node:$src2, node:$src3, node:$src1)>; +def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fma node:$src2, node:$src3, (fneg node:$src1))>; + +// Floating-point negative absolute. +def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; + +// Create a unary operator that loads from memory and then performs +// the given operation on it. +class loadu<SDPatternOperator operator, SDPatternOperator load = load> + : PatFrag<(ops node:$addr), (operator (load node:$addr))>; + +// Create a store operator that performs the given unary operation +// on the value before storing it. +class storeu<SDPatternOperator operator, SDPatternOperator store = store> + : PatFrag<(ops node:$value, node:$addr), + (store (operator node:$value), node:$addr)>; + +// Vector representation of all-zeros and all-ones. +def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; +def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; + +// Load a scalar and replicate it in all elements of a vector. +class z_replicate_load<ValueType scalartype, SDPatternOperator load> + : PatFrag<(ops node:$addr), + (z_replicate (scalartype (load node:$addr)))>; +def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>; +def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>; +def z_replicate_loadi32 : z_replicate_load<i32, load>; +def z_replicate_loadi64 : z_replicate_load<i64, load>; +def z_replicate_loadf32 : z_replicate_load<f32, load>; +def z_replicate_loadf64 : z_replicate_load<f64, load>; + +// Load a scalar and insert it into a single element of a vector. +class z_vle<ValueType scalartype, SDPatternOperator load> + : PatFrag<(ops node:$vec, node:$addr, node:$index), + (z_vector_insert node:$vec, (scalartype (load node:$addr)), + node:$index)>; +def z_vlei8 : z_vle<i32, anyextloadi8>; +def z_vlei16 : z_vle<i32, anyextloadi16>; +def z_vlei32 : z_vle<i32, load>; +def z_vlei64 : z_vle<i64, load>; +def z_vlef32 : z_vle<f32, load>; +def z_vlef64 : z_vle<f64, load>; + +// Load a scalar and insert it into the low element of the high i64 of a +// zeroed vector. +class z_vllez<ValueType scalartype, SDPatternOperator load, int index> + : PatFrag<(ops node:$addr), + (z_vector_insert (z_vzero), + (scalartype (load node:$addr)), (i32 index))>; +def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>; +def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>; +def z_vllezi32 : z_vllez<i32, load, 1>; +def z_vllezi64 : PatFrag<(ops node:$addr), + (z_join_dwords (i64 (load node:$addr)), (i64 0))>; +// We use high merges to form a v4f32 from four f32s. Propagating zero +// into all elements but index 1 gives this expression. +def z_vllezf32 : PatFrag<(ops node:$addr), + (bitconvert + (z_merge_high + (v2i64 + (z_unpackl_high + (v4i32 + (bitconvert + (v4f32 (scalar_to_vector + (f32 (load node:$addr)))))))), + (v2i64 (z_vzero))))>; +def z_vllezf64 : PatFrag<(ops node:$addr), + (z_merge_high + (scalar_to_vector (f64 (load node:$addr))), + (z_vzero))>; + +// Store one element of a vector. +class z_vste<ValueType scalartype, SDPatternOperator store> + : PatFrag<(ops node:$vec, node:$addr, node:$index), + (store (scalartype (z_vector_extract node:$vec, node:$index)), + node:$addr)>; +def z_vstei8 : z_vste<i32, truncstorei8>; +def z_vstei16 : z_vste<i32, truncstorei16>; +def z_vstei32 : z_vste<i32, store>; +def z_vstei64 : z_vste<i64, store>; +def z_vstef32 : z_vste<f32, store>; +def z_vstef64 : z_vste<f64, store>; + +// Arithmetic negation on vectors. +def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; + +// Bitwise negation on vectors. +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>; + +// Signed "integer greater than zero" on vectors. +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>; + +// Signed "integer less than zero" on vectors. +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>; + +// Integer absolute on vectors. +class z_viabs<int shift> + : PatFrag<(ops node:$src), + (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))), + (z_vsra_by_scalar node:$src, (i32 shift)))>; +def z_viabs8 : z_viabs<7>; +def z_viabs16 : z_viabs<15>; +def z_viabs32 : z_viabs<31>; +def z_viabs64 : z_viabs<63>; + +// Sign-extend the i64 elements of a vector. +class z_vse<int shift> + : PatFrag<(ops node:$src), + (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>; +def z_vsei8 : z_vse<56>; +def z_vsei16 : z_vse<48>; +def z_vsei32 : z_vse<32>; + +// ...and again with the extensions being done on individual i64 scalars. +class z_vse_by_parts<SDPatternOperator operator, int index1, int index2> + : PatFrag<(ops node:$src), + (z_join_dwords + (operator (z_vector_extract node:$src, index1)), + (operator (z_vector_extract node:$src, index2)))>; +def z_vsei8_by_parts : z_vse_by_parts<sext8dbl, 7, 15>; +def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>; +def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td new file mode 100644 index 0000000..16a7ed7 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -0,0 +1,169 @@ +//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Record that INSN performs a 64-bit version of unary operator OPERATOR +// in which the operand is sign-extended from 32 to 64 bits. +multiclass SXU<SDPatternOperator operator, Instruction insn> { + def : Pat<(operator (sext (i32 GR32:$src))), + (insn GR32:$src)>; + def : Pat<(operator (sext_inreg GR64:$src, i32)), + (insn (EXTRACT_SUBREG GR64:$src, subreg_l32))>; +} + +// Record that INSN performs a 64-bit version of binary operator OPERATOR +// in which the first operand has class CLS and which the second operand +// is sign-extended from a 32-bit register. +multiclass SXB<SDPatternOperator operator, RegisterOperand cls, + Instruction insn> { + def : Pat<(operator cls:$src1, (sext GR32:$src2)), + (insn cls:$src1, GR32:$src2)>; + def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)), + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>; +} + +// Like SXB, but for zero extension. +multiclass ZXB<SDPatternOperator operator, RegisterOperand cls, + Instruction insn> { + def : Pat<(operator cls:$src1, (zext GR32:$src2)), + (insn cls:$src1, GR32:$src2)>; + def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)), + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>; +} + +// Record that INSN performs a binary read-modify-write operation, +// with LOAD, OPERATOR and STORE being the read, modify and write +// respectively. MODE is the addressing mode and IMM is the type +// of the second operand. +class RMWI<SDPatternOperator load, SDPatternOperator operator, + SDPatternOperator store, AddressingMode mode, + PatFrag imm, Instruction insn> + : Pat<(store (operator (load mode:$addr), imm:$src), mode:$addr), + (insn mode:$addr, (UIMM8 imm:$src))>; + +// Record that INSN performs binary operation OPERATION on a byte +// memory location. IMM is the type of the second operand. +multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode, + Instruction insn> { + def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>; + def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>; +} + +// Record that INSN performs insertion TYPE into a register of class CLS. +// The inserted operand is loaded using LOAD from an address of mode MODE. +multiclass InsertMem<string type, Instruction insn, RegisterOperand cls, + SDPatternOperator load, AddressingMode mode> { + def : Pat<(!cast<SDPatternOperator>("or_as_"##type) + cls:$src1, (load mode:$src2)), + (insn cls:$src1, mode:$src2)>; + def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type) + (load mode:$src2), cls:$src1), + (insn cls:$src1, mode:$src2)>; +} + +// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64<Instruction insn, SDPatternOperator operator, + AddressingMode mode> + : Pat<(operator GR64:$R1, mode:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), mode:$XBD2)>; + +// INSN and INSNY are an RX/RXY pair of instructions that store the low +// 32 bits of a GPR to memory. Record that they are equivalent to using +// OPERATOR to store a GR64. +multiclass StoreGR64Pair<Instruction insn, Instruction insny, + SDPatternOperator operator> { + def : StoreGR64<insn, operator, bdxaddr12pair>; + def : StoreGR64<insny, operator, bdxaddr20pair>; +} + +// INSN stores the low 32 bits of a GPR using PC-relative addressing. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64PC<Instruction insn, SDPatternOperator operator> + : Pat<(operator GR64:$R1, pcrel32:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), pcrel32:$XBD2)> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory, +// with INSN storing when the condition is true and INSNINV storing when the +// condition is false. Record that they are equivalent to a LOAD/select/STORE +// sequence for GR64s. +multiclass CondStores64<Instruction insn, Instruction insninv, + SDPatternOperator store, SDPatternOperator load, + AddressingMode mode> { + def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), + imm32zx4:$valid, imm32zx4:$cc), + mode:$addr), + (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, + imm32zx4:$valid, imm32zx4:$cc)>; + def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, + imm32zx4:$valid, imm32zx4:$cc), + mode:$addr), + (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, + imm32zx4:$valid, imm32zx4:$cc)>; +} + +// Try to use MVC instruction INSN for a load of type LOAD followed by a store +// of the same size. VT is the type of the intermediate (legalized) value and +// LENGTH is the number of bytes loaded by LOAD. +multiclass MVCLoadStore<SDPatternOperator load, ValueType vt, Instruction insn, + bits<5> length> { + def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} + +// Use NC-like instruction INSN for block_op operation OPERATOR. +// The other operand is a load of type LOAD, which accesses LENGTH bytes. +// VT is the intermediate legalized type in which the binary operation +// is actually done. +multiclass BinaryLoadStore<SDPatternOperator operator, SDPatternOperator load, + ValueType vt, Instruction insn, bits<5> length> { + def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} + +// A convenient way of generating all block peepholes for a particular +// LOAD/VT/LENGTH combination. +multiclass BlockLoadStore<SDPatternOperator load, ValueType vt, + Instruction mvc, Instruction nc, Instruction oc, + Instruction xc, bits<5> length> { + defm : MVCLoadStore<load, vt, mvc, length>; + defm : BinaryLoadStore<block_and1, load, vt, nc, length>; + defm : BinaryLoadStore<block_and2, load, vt, nc, length>; + defm : BinaryLoadStore<block_or1, load, vt, oc, length>; + defm : BinaryLoadStore<block_or2, load, vt, oc, length>; + defm : BinaryLoadStore<block_xor1, load, vt, xc, length>; + defm : BinaryLoadStore<block_xor2, load, vt, xc, length>; +} + +// Record that INSN is a LOAD AND TEST that can be used to compare +// registers in CLS against zero. The instruction has separate R1 and R2 +// operands, but they must be the same when the instruction is used like this. +multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> { + def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; + // The sign of the zero makes no difference. + def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; +} + +// Use INSN for performing binary operation OPERATION of type VT +// on registers of class CLS. +class BinaryRRWithType<Instruction insn, RegisterOperand cls, + SDPatternOperator operator, ValueType vt> + : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>; + +// Use INSN to perform conversion operation OPERATOR, with the input being +// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions +// and 0 to allow them. MODE is the rounding mode to use. +class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1, + TypedReg tr2, bits<3> suppress, bits<4> mode> + : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), + (insn tr2.op:$vec, suppress, mode)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td new file mode 100644 index 0000000..32fbe5a --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -0,0 +1,96 @@ +//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Processor and feature definitions. +// +//===----------------------------------------------------------------------===// + +class SystemZFeature<string extname, string intname, string desc> + : Predicate<"Subtarget->has"##intname##"()">, + AssemblerPredicate<"Feature"##intname, extname>, + SubtargetFeature<extname, "Has"##intname, "true", desc>; + +class SystemZMissingFeature<string intname> + : Predicate<"!Subtarget->has"##intname##"()">; + +def FeatureDistinctOps : SystemZFeature< + "distinct-ops", "DistinctOps", + "Assume that the distinct-operands facility is installed" +>; + +def FeatureLoadStoreOnCond : SystemZFeature< + "load-store-on-cond", "LoadStoreOnCond", + "Assume that the load/store-on-condition facility is installed" +>; + +def FeatureHighWord : SystemZFeature< + "high-word", "HighWord", + "Assume that the high-word facility is installed" +>; + +def FeatureFPExtension : SystemZFeature< + "fp-extension", "FPExtension", + "Assume that the floating-point extension facility is installed" +>; + +def FeaturePopulationCount : SystemZFeature< + "population-count", "PopulationCount", + "Assume that the population-count facility is installed" +>; + +def FeatureFastSerialization : SystemZFeature< + "fast-serialization", "FastSerialization", + "Assume that the fast-serialization facility is installed" +>; + +def FeatureInterlockedAccess1 : SystemZFeature< + "interlocked-access1", "InterlockedAccess1", + "Assume that interlocked-access facility 1 is installed" +>; +def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; + +def FeatureMiscellaneousExtensions : SystemZFeature< + "miscellaneous-extensions", "MiscellaneousExtensions", + "Assume that the miscellaneous-extensions facility is installed" +>; + +def FeatureTransactionalExecution : SystemZFeature< + "transactional-execution", "TransactionalExecution", + "Assume that the transactional-execution facility is installed" +>; + +def FeatureProcessorAssist : SystemZFeature< + "processor-assist", "ProcessorAssist", + "Assume that the processor-assist facility is installed" +>; + +def FeatureVector : SystemZFeature< + "vector", "Vector", + "Assume that the vectory facility is installed" +>; +def FeatureNoVector : SystemZMissingFeature<"Vector">; + +def : Processor<"generic", NoItineraries, []>; +def : Processor<"z10", NoItineraries, []>; +def : Processor<"z196", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1]>; +def : Processor<"zEC12", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1, + FeatureMiscellaneousExtensions, + FeatureTransactionalExecution, FeatureProcessorAssist]>; +def : Processor<"z13", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1, + FeatureTransactionalExecution, FeatureProcessorAssist, + FeatureVector]>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp new file mode 100644 index 0000000..6fd24e3 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -0,0 +1,140 @@ +//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZInstrInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetFrameLowering.h" + +using namespace llvm; + +#define GET_REGINFO_TARGET_DESC +#include "SystemZGenRegisterInfo.inc" + +SystemZRegisterInfo::SystemZRegisterInfo() + : SystemZGenRegisterInfo(SystemZ::R14D) {} + +const MCPhysReg * +SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_SystemZ_SaveList; +} + +const uint32_t * +SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + return CSR_SystemZ_RegMask; +} + +BitVector +SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const SystemZFrameLowering *TFI = getFrameLowering(MF); + + if (TFI->hasFP(MF)) { + // R11D is the frame pointer. Reserve all aliases. + Reserved.set(SystemZ::R11D); + Reserved.set(SystemZ::R11L); + Reserved.set(SystemZ::R11H); + Reserved.set(SystemZ::R10Q); + } + + // R15D is the stack pointer. Reserve all aliases. + Reserved.set(SystemZ::R15D); + Reserved.set(SystemZ::R15L); + Reserved.set(SystemZ::R15H); + Reserved.set(SystemZ::R14Q); + return Reserved; +} + +void +SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Outgoing arguments should be part of the frame"); + + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + auto *TII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const SystemZFrameLowering *TFI = getFrameLowering(MF); + DebugLoc DL = MI->getDebugLoc(); + + // Decompose the frame index into a base and offset. + int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); + unsigned BasePtr; + int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) + + MI->getOperand(FIOperandNum + 1).getImm()); + + // Special handling of dbg_value instructions. + if (MI->isDebugValue()) { + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false); + MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + // See if the offset is in range, or if an equivalent instruction that + // accepts the offset exists. + unsigned Opcode = MI->getOpcode(); + unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); + if (OpcodeForOffset) + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + else { + // Create an anchor point that is in range. Start at 0xffff so that + // can use LLILH to load the immediate. + int64_t OldOffset = Offset; + int64_t Mask = 0xffff; + do { + Offset = OldOffset & Mask; + OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); + Mask >>= 1; + assert(Mask && "One offset must be OK"); + } while (!OpcodeForOffset); + + unsigned ScratchReg = + MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); + int64_t HighOffset = OldOffset - Offset; + + if (MI->getDesc().TSFlags & SystemZII::HasIndex + && MI->getOperand(FIOperandNum + 2).getReg() == 0) { + // Load the offset into the scratch register and use it as an index. + // The scratch register then dies here. + TII->loadImmediate(MBB, MI, ScratchReg, HighOffset); + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg, + false, false, true); + } else { + // Load the anchor address into a scratch register. + unsigned LAOpcode = TII->getOpcodeForOffset(SystemZ::LA, HighOffset); + if (LAOpcode) + BuildMI(MBB, MI, DL, TII->get(LAOpcode),ScratchReg) + .addReg(BasePtr).addImm(HighOffset).addReg(0); + else { + // Load the high offset into the scratch register and use it as + // an index. + TII->loadImmediate(MBB, MI, ScratchReg, HighOffset); + BuildMI(MBB, MI, DL, TII->get(SystemZ::AGR),ScratchReg) + .addReg(ScratchReg, RegState::Kill).addReg(BasePtr); + } + + // Use the scratch register as the base. It then dies here. + MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg, + false, false, true); + } + } + MI->setDesc(TII->get(OpcodeForOffset)); + MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); +} + +unsigned +SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const SystemZFrameLowering *TFI = getFrameLowering(MF); + return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h new file mode 100644 index 0000000..a0db5a9 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -0,0 +1,58 @@ +//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H + +#include "SystemZ.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "SystemZGenRegisterInfo.inc" + +namespace llvm { + +namespace SystemZ { +// Return the subreg to use for referring to the even and odd registers +// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. +inline unsigned even128(bool Is32bit) { + return Is32bit ? subreg_hl32 : subreg_h64; +} +inline unsigned odd128(bool Is32bit) { + return Is32bit ? subreg_l32 : subreg_l64; +} +} // end namespace SystemZ + +struct SystemZRegisterInfo : public SystemZGenRegisterInfo { +public: + SystemZRegisterInfo(); + + // Override TargetRegisterInfo.h. + bool requiresRegisterScavenging(const MachineFunction &MF) const override { + return true; + } + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { + return true; + } + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td new file mode 100644 index 0000000..0d8b08b --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -0,0 +1,286 @@ +//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Class definitions. +//===----------------------------------------------------------------------===// + +class SystemZReg<string n> : Register<n> { + let Namespace = "SystemZ"; +} + +class SystemZRegWithSubregs<string n, list<Register> subregs> + : RegisterWithSubRegs<n, subregs> { + let Namespace = "SystemZ"; +} + +let Namespace = "SystemZ" in { +def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32. +def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32. +def subreg_l64 : SubRegIndex<64, 0>; +def subreg_h64 : SubRegIndex<64, 64>; +def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits. +def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits. +def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>; +def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>; +def subreg_hr32 : ComposedSubRegIndex<subreg_h64, subreg_r32>; +} + +// Define a register class that contains values of types TYPES and an +// associated operand called NAME. SIZE is the size and alignment +// of the registers and REGLIST is the list of individual registers. +multiclass SystemZRegClass<string name, list<ValueType> types, int size, + dag regList> { + def AsmOperand : AsmOperandClass { + let Name = name; + let ParserMethod = "parse"##name; + let RenderMethod = "addRegOperands"; + } + def Bit : RegisterClass<"SystemZ", types, size, regList> { + let Size = size; + } + def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> { + let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand"); + } +} + +//===----------------------------------------------------------------------===// +// General-purpose registers +//===----------------------------------------------------------------------===// + +// Lower 32 bits of one of the 16 64-bit general-purpose registers +class GPR32<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} + +// One of the 16 64-bit general-purpose registers. +class GPR64<bits<16> num, string n, GPR32 low, GPR32 high> + : SystemZRegWithSubregs<n, [low, high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_l32, subreg_h32]; +} + +// 8 even-odd pairs of GPR64s. +class GPR128<bits<16> num, string n, GPR64 low, GPR64 high> + : SystemZRegWithSubregs<n, [low, high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_l64, subreg_h64]; +} + +// General-purpose registers +foreach I = 0-15 in { + def R#I#L : GPR32<I, "r"#I>; + def R#I#H : GPR32<I, "r"#I>; + def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"L"), !cast<GPR32>("R"#I#"H")>, + DwarfRegNum<[I]>; +} + +foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in { + def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#!add(I, 1)#"D"), + !cast<GPR64>("R"#I#"D")>; +} + +/// Allocate the callee-saved R6-R13 backwards. That way they can be saved +/// together with R14 and R15 in one prolog instruction. +defm GR32 : SystemZRegClass<"GR32", [i32], 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uL", 15, 6))>; +defm GRH32 : SystemZRegClass<"GRH32", [i32], 32, + (add (sequence "R%uH", 0, 5), + (sequence "R%uH", 15, 6))>; +defm GR64 : SystemZRegClass<"GR64", [i64], 64, + (add (sequence "R%uD", 0, 5), + (sequence "R%uD", 15, 6))>; + +// Combine the low and high GR32s into a single class. This can only be +// used for virtual registers if the high-word facility is available. +defm GRX32 : SystemZRegClass<"GRX32", [i32], 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uH", 0, 5), + R15L, R15H, R14L, R14H, R13L, R13H, + R12L, R12H, R11L, R11H, R10L, R10H, + R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>; + +// The architecture doesn't really have any i128 support, so model the +// register pairs as untyped instead. +defm GR128 : SystemZRegClass<"GR128", [untyped], 128, + (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>; + +// Base and index registers. Everything except R0, which in an address +// context evaluates as 0. +defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>; +defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>; + +// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs +// of a GR128. +defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>; + +//===----------------------------------------------------------------------===// +// Floating-point registers +//===----------------------------------------------------------------------===// + +// Maps FPR register numbers to their DWARF encoding. +class DwarfMapping<int id> { int Id = id; } + +def F0Dwarf : DwarfMapping<16>; +def F2Dwarf : DwarfMapping<17>; +def F4Dwarf : DwarfMapping<18>; +def F6Dwarf : DwarfMapping<19>; + +def F1Dwarf : DwarfMapping<20>; +def F3Dwarf : DwarfMapping<21>; +def F5Dwarf : DwarfMapping<22>; +def F7Dwarf : DwarfMapping<23>; + +def F8Dwarf : DwarfMapping<24>; +def F10Dwarf : DwarfMapping<25>; +def F12Dwarf : DwarfMapping<26>; +def F14Dwarf : DwarfMapping<27>; + +def F9Dwarf : DwarfMapping<28>; +def F11Dwarf : DwarfMapping<29>; +def F13Dwarf : DwarfMapping<30>; +def F15Dwarf : DwarfMapping<31>; + +def F16Dwarf : DwarfMapping<68>; +def F18Dwarf : DwarfMapping<69>; +def F20Dwarf : DwarfMapping<70>; +def F22Dwarf : DwarfMapping<71>; + +def F17Dwarf : DwarfMapping<72>; +def F19Dwarf : DwarfMapping<73>; +def F21Dwarf : DwarfMapping<74>; +def F23Dwarf : DwarfMapping<75>; + +def F24Dwarf : DwarfMapping<76>; +def F26Dwarf : DwarfMapping<77>; +def F28Dwarf : DwarfMapping<78>; +def F30Dwarf : DwarfMapping<79>; + +def F25Dwarf : DwarfMapping<80>; +def F27Dwarf : DwarfMapping<81>; +def F29Dwarf : DwarfMapping<82>; +def F31Dwarf : DwarfMapping<83>; + +// Upper 32 bits of one of the floating-point registers +class FPR32<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} + +// One of the floating-point registers. +class FPR64<bits<16> num, string n, FPR32 high> + : SystemZRegWithSubregs<n, [high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_r32]; +} + +// 8 pairs of FPR64s, with a one-register gap inbetween. +class FPR128<bits<16> num, string n, FPR64 low, FPR64 high> + : SystemZRegWithSubregs<n, [low, high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_l64, subreg_h64]; +} + +// Floating-point registers. Registers 16-31 require the vector facility. +foreach I = 0-15 in { + def F#I#S : FPR32<I, "f"#I>; + def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} +foreach I = 16-31 in { + def F#I#S : FPR32<I, "v"#I>; + def F#I#D : FPR64<I, "v"#I, !cast<FPR32>("F"#I#"S")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} + +foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { + def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"), + !cast<FPR64>("F"#I#"D")>; +} + +// There's no store-multiple instruction for FPRs, so we're not fussy +// about the order in which call-saved registers are allocated. +defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>; +defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>; +defm FP128 : SystemZRegClass<"FP128", [f128], 128, + (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>; + +//===----------------------------------------------------------------------===// +// Vector registers +//===----------------------------------------------------------------------===// + +// A full 128-bit vector register, with an FPR64 as its high part. +class VR128<bits<16> num, string n, FPR64 high> + : SystemZRegWithSubregs<n, [high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_r64]; +} + +// Full vector registers. +foreach I = 0-31 in { + def V#I : VR128<I, "v"#I, !cast<FPR64>("F"#I#"D")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} + +// Class used to store 32-bit values in the first element of a vector +// register. f32 scalars are used for the WLEDB and WLDEB instructions. +defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32, + (add (sequence "F%uS", 0, 7), + (sequence "F%uS", 16, 31), + (sequence "F%uS", 8, 15))>; + +// Class used to store 64-bit values in the upper half of a vector register. +// The vector facility also includes scalar f64 instructions that operate +// on the full vector register set. +defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64, + (add (sequence "F%uD", 0, 7), + (sequence "F%uD", 16, 31), + (sequence "F%uD", 8, 15))>; + +// The subset of vector registers that can be used for floating-point +// operations too. +defm VF128 : SystemZRegClass<"VF128", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + (sequence "V%u", 0, 15)>; + +// All vector registers. +defm VR128 : SystemZRegClass<"VR128", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + (add (sequence "V%u", 0, 7), + (sequence "V%u", 16, 31), + (sequence "V%u", 8, 15))>; + +// Attaches a ValueType to a register operand, to make the instruction +// definitions easier. +class TypedReg<ValueType vtin, RegisterOperand opin> { + ValueType vt = vtin; + RegisterOperand op = opin; +} + +def v32eb : TypedReg<f32, VR32>; +def v64g : TypedReg<i64, VR64>; +def v64db : TypedReg<f64, VR64>; +def v128b : TypedReg<v16i8, VR128>; +def v128h : TypedReg<v8i16, VR128>; +def v128f : TypedReg<v4i32, VR128>; +def v128g : TypedReg<v2i64, VR128>; +def v128q : TypedReg<v16i8, VR128>; +def v128eb : TypedReg<v4f32, VR128>; +def v128db : TypedReg<v2f64, VR128>; +def v128any : TypedReg<untyped, VR128>; + +//===----------------------------------------------------------------------===// +// Other registers +//===----------------------------------------------------------------------===// + +// The 2-bit condition code field of the PSW. Every register named in an +// inline asm needs a class associated with it. +def CC : SystemZReg<"cc">; +let isAllocatable = 0 in + def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp new file mode 100644 index 0000000..178aa38 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -0,0 +1,285 @@ +//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-selectiondag-info" + +// Decide whether it is best to use a loop or straight-line code for +// a block operation of Size bytes with source address Src and destination +// address Dest. Sequence is the opcode to use for straight-line code +// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). +// Return the chain for the completed operation. +static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, + unsigned Loop, SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size) { + EVT PtrVT = Src.getValueType(); + // The heuristic we use is to prefer loops for anything that would + // require 7 or more MVCs. With these kinds of sizes there isn't + // much to choose between straight-line code and looping code, + // since the time will be dominated by the MVCs themselves. + // However, the loop has 4 or 5 instructions (depending on whether + // the base addresses can be proved equal), so there doesn't seem + // much point using a loop for 5 * 256 bytes or fewer. Anything in + // the range (5 * 256, 6 * 256) will need another instruction after + // the loop, so it doesn't seem worth using a loop then either. + // The next value up, 6 * 256, can be implemented in the same + // number of straight-line MVCs as 6 * 256 - 1. + if (Size > 6 * 256) + return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, DL, PtrVT), + DAG.getConstant(Size / 256, DL, PtrVT)); + return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, DL, PtrVT)); +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + if (IsVolatile) + return SDValue(); + + if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, Dst, Src, CSize->getZExtValue()); + return SDValue(); +} + +// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by +// Chain, Dst, ByteVal and Size. These cases are expected to use +// MVI, MVHHI, MVHI and MVGHI respectively. +static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, uint64_t ByteVal, uint64_t Size, + unsigned Align, + MachinePointerInfo DstPtrInfo) { + uint64_t StoreVal = ByteVal; + for (unsigned I = 1; I < Size; ++I) + StoreVal |= ByteVal << (I * 8); + return DAG.getStore(Chain, DL, + DAG.getConstant(StoreVal, DL, + MVT::getIntegerVT(Size * 8)), + Dst, DstPtrInfo, false, false, Align); +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Byte, SDValue Size, + unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { + EVT PtrVT = Dst.getValueType(); + + if (IsVolatile) + return SDValue(); + + if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes == 0) + return SDValue(); + if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) { + // Handle cases that can be done using at most two of + // MVI, MVHI, MVHHI and MVGHI. The latter two can only be + // used if ByteVal is all zeros or all ones; in other casees, + // we can move at most 2 halfwords. + uint64_t ByteVal = CByte->getZExtValue(); + if (ByteVal == 0 || ByteVal == 255 ? + Bytes <= 16 && countPopulation(Bytes) <= 2 : + Bytes <= 4) { + unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); + unsigned Size2 = Bytes - Size1; + SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, + Align, DstPtrInfo); + if (Size2 == 0) + return Chain1; + Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(Size1, DL, PtrVT)); + DstPtrInfo = DstPtrInfo.getWithOffset(Size1); + SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, + std::min(Align, Size1), DstPtrInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } else { + // Handle one and two bytes using STC. + if (Bytes <= 2) { + SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + if (Bytes == 1) + return Chain1; + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, DL, PtrVT)); + SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, + DstPtrInfo.getWithOffset(1), + false, false, 1); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } + assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); + + // Handle the special case of a memset of 0, which can use XC. + auto *CByte = dyn_cast<ConstantSDNode>(Byte); + if (CByte && CByte->getZExtValue() == 0) + return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, + Chain, Dst, Dst, Bytes); + + // Copy the byte to the first location and then use MVC to copy + // it to the rest. + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, DL, PtrVT)); + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, DstPlus1, Dst, Bytes - 1); + } + return SDValue(); +} + +// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), +// deciding whether to use a loop or straight-line code. +static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, uint64_t Size) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + EVT PtrVT = Src1.getValueType(); + // A two-CLC sequence is a clear win over a loop, not least because it + // needs only one branch. A three-CLC sequence needs the same number + // of branches as a loop (i.e. 2), but is shorter. That brings us to + // lengths greater than 768 bytes. It seems relatively likely that + // a difference will be found within the first 768 bytes, so we just + // optimize for the smallest number of branch instructions, in order + // to avoid polluting the prediction buffer too much. A loop only ever + // needs 2 branches, whereas a straight-line sequence would need 3 or more. + if (Size > 3 * 256) + return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, DL, PtrVT), + DAG.getConstant(Size / 256, DL, PtrVT)); + return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, DL, PtrVT)); +} + +// Convert the current CC value into an integer that is 0 if CC == 0, +// less than zero if CC == 1 and greater than zero if CC >= 2. +// The sequence starts with IPM, which puts CC into bits 29 and 28 +// of an integer and clears bits 30 and 31. +static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); + SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, + DAG.getConstant(31, DL, MVT::i32)); + return ROTL; +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + assert(Bytes > 0 && "Caller should have handled 0-size case"); + Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); + SDValue Glue = Chain.getValue(1); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + } + return std::make_pair(SDValue(), SDValue()); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const { + // Use SRST to find the character. End is its address on success. + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); + Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); + Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, + DAG.getConstant(255, DL, MVT::i32)); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, Char); + Chain = End.getValue(1); + SDValue Glue = End.getValue(2); + + // Now select between End and null, depending on whether the character + // was found. + SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), + DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), + DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), + Glue}; + VTs = DAG.getVTList(PtrVT, MVT::Glue); + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); + return std::make_pair(End, Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dest, SDValue Src, + MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { + SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); + SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, + DAG.getConstant(0, DL, MVT::i32)); + return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); + SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(0, DL, MVT::i32)); + Chain = Unused.getValue(1); + SDValue Glue = Chain.getValue(2); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); +} + +// Search from Src for a null character, stopping once Src reaches Limit. +// Return a pair of values, the first being the number of nonnull characters +// and the second being the out chain. +// +// This can be used for strlen by setting Limit to 0. +static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Src, + SDValue Limit) { + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, DAG.getConstant(0, DL, MVT::i32)); + Chain = End.getValue(1); + SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); + return std::make_pair(Len, Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); + return getBoundedStrlen(DAG, DL, Chain, Src, Limit); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h new file mode 100644 index 0000000..246fa3e --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -0,0 +1,76 @@ +//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SystemZ subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SystemZTargetMachine; + +class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit SystemZSelectionDAGInfo() = default; + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Dst, SDValue Byte, + SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dest, SDValue Src, + MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, + bool isStpcpy) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, + MachinePointerInfo SrcPtrInfo) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const override; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp new file mode 100644 index 0000000..846edd5 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -0,0 +1,277 @@ +//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to replace instructions with shorter forms. For example, +// IILF can be replaced with LLILL or LLILH if the constant fits and if the +// other 32 bits of the GR64 destination are not live. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-shorten-inst" + +namespace { +class SystemZShortenInst : public MachineFunctionPass { +public: + static char ID; + SystemZShortenInst(const SystemZTargetMachine &tm); + + const char *getPassName() const override { + return "SystemZ Instruction Shortening"; + } + + bool processBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) override; + +private: + bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH); + bool shortenOn0(MachineInstr &MI, unsigned Opcode); + bool shortenOn01(MachineInstr &MI, unsigned Opcode); + bool shortenOn001(MachineInstr &MI, unsigned Opcode); + bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode); + bool shortenFPConv(MachineInstr &MI, unsigned Opcode); + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + LivePhysRegs LiveRegs; +}; + +char SystemZShortenInst::ID = 0; +} // end anonymous namespace + +FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) { + return new SystemZShortenInst(TM); +} + +SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(nullptr) {} + +// Tie operands if MI has become a two-address instruction. +static void tieOpsIfNeeded(MachineInstr &MI) { + if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) && + !MI.getOperand(0).isTied()) + MI.tieOperands(0, 1); +} + +// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH +// are the halfword immediate loads for the same word. Try to use one of them +// instead of IIxF. +bool SystemZShortenInst::shortenIIF(MachineInstr &MI, + unsigned LLIxL, unsigned LLIxH) { + unsigned Reg = MI.getOperand(0).getReg(); + // The new opcode will clear the other half of the GR64 reg, so + // cancel if that is live. + unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ? + SystemZ::subreg_h32 : SystemZ::subreg_l32); + unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ? + SystemZ::subreg_h32 : SystemZ::subreg_l32); + unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx, + &SystemZ::GR64BitRegClass); + unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); + if (LiveRegs.contains(OtherReg)) + return false; + + uint64_t Imm = MI.getOperand(1).getImm(); + if (SystemZ::isImmLL(Imm)) { + MI.setDesc(TII->get(LLIxL)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + return true; + } + if (SystemZ::isImmLH(Imm)) { + MI.setDesc(TII->get(LLIxH)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + MI.getOperand(1).setImm(Imm >> 16); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding. +bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0 and 1 have a +// 4-bit encoding. +bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a +// 4-bit encoding and if operands 0 and 1 are tied. Also ties op 0 +// with op 1, if MI becomes 2-address. +bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + MI.getOperand(1).getReg() == MI.getOperand(0).getReg() && + SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + tieOpsIfNeeded(MI); + return true; + } + return false; +} + +// Calls shortenOn001 if CCLive is false. CC def operand is added in +// case of success. +bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, + unsigned Opcode) { + if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) { + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + return true; + } + return false; +} + +// MI is a vector-style conversion instruction with the operand order: +// destination, source, exact-suppress, rounding-mode. If both registers +// have a 4-bit encoding then change it to Opcode, which has operand order: +// destination, rouding-mode, source, exact-suppress. +bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MachineOperand Dest(MI.getOperand(0)); + MachineOperand Src(MI.getOperand(1)); + MachineOperand Suppress(MI.getOperand(2)); + MachineOperand Mode(MI.getOperand(3)); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.RemoveOperand(1); + MI.RemoveOperand(0); + MI.setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .addOperand(Dest) + .addOperand(Mode) + .addOperand(Src) + .addOperand(Suppress); + return true; + } + return false; +} + +// Process all instructions in MBB. Return true if something changed. +bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + // Set up the set of live registers at the end of MBB (live out) + LiveRegs.clear(); + LiveRegs.addLiveOuts(&MBB); + + // Iterate backwards through the block looking for instructions to change. + for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + switch (MI.getOpcode()) { + case SystemZ::IILF: + Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH); + break; + + case SystemZ::IIHF: + Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH); + break; + + case SystemZ::WFADB: + Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); + break; + + case SystemZ::WFDDB: + Changed |= shortenOn001(MI, SystemZ::DDBR); + break; + + case SystemZ::WFIDB: + Changed |= shortenFPConv(MI, SystemZ::FIDBRA); + break; + + case SystemZ::WLDEB: + Changed |= shortenOn01(MI, SystemZ::LDEBR); + break; + + case SystemZ::WLEDB: + Changed |= shortenFPConv(MI, SystemZ::LEDBRA); + break; + + case SystemZ::WFMDB: + Changed |= shortenOn001(MI, SystemZ::MDBR); + break; + + case SystemZ::WFLCDB: + Changed |= shortenOn01(MI, SystemZ::LCDFR); + break; + + case SystemZ::WFLNDB: + Changed |= shortenOn01(MI, SystemZ::LNDFR); + break; + + case SystemZ::WFLPDB: + Changed |= shortenOn01(MI, SystemZ::LPDFR); + break; + + case SystemZ::WFSQDB: + Changed |= shortenOn01(MI, SystemZ::SQDBR); + break; + + case SystemZ::WFSDB: + Changed |= shortenOn001AddCC(MI, SystemZ::SDBR); + break; + + case SystemZ::WFCDB: + Changed |= shortenOn01(MI, SystemZ::CDBR); + break; + + case SystemZ::VL32: + // For z13 we prefer LDE over LE to avoid partial register dependencies. + Changed |= shortenOn0(MI, SystemZ::LDE32); + break; + + case SystemZ::VST32: + Changed |= shortenOn0(MI, SystemZ::STE); + break; + + case SystemZ::VL64: + Changed |= shortenOn0(MI, SystemZ::LD); + break; + + case SystemZ::VST64: + Changed |= shortenOn0(MI, SystemZ::STD); + break; + } + + LiveRegs.stepBackward(MI); + } + + return Changed; +} + +bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { + const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + LiveRegs.init(TRI); + + bool Changed = false; + for (auto &MBB : F) + Changed |= processBlock(MBB); + + return Changed; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp new file mode 100644 index 0000000..0b49fcd --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -0,0 +1,72 @@ +//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZSubtarget.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/IR/GlobalValue.h" + +using namespace llvm; + +#define DEBUG_TYPE "systemz-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SystemZGenSubtargetInfo.inc" + +// Pin the vtable to this file. +void SystemZSubtarget::anchor() {} + +SystemZSubtarget & +SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; + // Parse features string. + ParseSubtargetFeatures(CPUName, FS); + return *this; +} + +SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, + const TargetMachine &TM) + : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), + HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), + HasPopulationCount(false), HasFastSerialization(false), + HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), + HasTransactionalExecution(false), HasProcessorAssist(false), + HasVector(false), TargetTriple(TT), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + TSInfo(), FrameLowering() {} + +// Return true if GV binds locally under reloc model RM. +static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) { + // For non-PIC, all symbols bind locally. + if (RM == Reloc::Static) + return true; + + return GV->hasLocalLinkage() || !GV->hasDefaultVisibility(); +} + +bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV, + Reloc::Model RM, + CodeModel::Model CM) const { + // PC32DBL accesses require the low bit to be clear. Note that a zero + // value selects the default alignment and is therefore OK. + if (GV->getAlignment() == 1) + return false; + + // For the small model, all locally-binding symbols are in range. + if (CM == CodeModel::Small) + return bindsLocally(GV, RM); + + // For Medium and above, assume that the symbol is not within the 4GB range. + // Taking the address of locally-defined text would be OK, but that + // case isn't easy to detect. + return false; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h new file mode 100644 index 0000000..f7eaf01c --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -0,0 +1,126 @@ +//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H + +#include "SystemZFrameLowering.h" +#include "SystemZISelLowering.h" +#include "SystemZInstrInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "SystemZGenSubtargetInfo.inc" + +namespace llvm { +class GlobalValue; +class StringRef; + +class SystemZSubtarget : public SystemZGenSubtargetInfo { + virtual void anchor(); +protected: + bool HasDistinctOps; + bool HasLoadStoreOnCond; + bool HasHighWord; + bool HasFPExtension; + bool HasPopulationCount; + bool HasFastSerialization; + bool HasInterlockedAccess1; + bool HasMiscellaneousExtensions; + bool HasTransactionalExecution; + bool HasProcessorAssist; + bool HasVector; + +private: + Triple TargetTriple; + SystemZInstrInfo InstrInfo; + SystemZTargetLowering TLInfo; + SystemZSelectionDAGInfo TSInfo; + SystemZFrameLowering FrameLowering; + + SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU, + StringRef FS); +public: + SystemZSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM); + + const TargetFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const SystemZRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + const SystemZTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + // This is important for reducing register pressure in vector code. + bool useAA() const override { return true; } + + // Automatically generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + // Return true if the target has the distinct-operands facility. + bool hasDistinctOps() const { return HasDistinctOps; } + + // Return true if the target has the load/store-on-condition facility. + bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; } + + // Return true if the target has the high-word facility. + bool hasHighWord() const { return HasHighWord; } + + // Return true if the target has the floating-point extension facility. + bool hasFPExtension() const { return HasFPExtension; } + + // Return true if the target has the population-count facility. + bool hasPopulationCount() const { return HasPopulationCount; } + + // Return true if the target has the fast-serialization facility. + bool hasFastSerialization() const { return HasFastSerialization; } + + // Return true if the target has interlocked-access facility 1. + bool hasInterlockedAccess1() const { return HasInterlockedAccess1; } + + // Return true if the target has the miscellaneous-extensions facility. + bool hasMiscellaneousExtensions() const { + return HasMiscellaneousExtensions; + } + + // Return true if the target has the transactional-execution facility. + bool hasTransactionalExecution() const { return HasTransactionalExecution; } + + // Return true if the target has the processor-assist facility. + bool hasProcessorAssist() const { return HasProcessorAssist; } + + // Return true if the target has the vector facility. + bool hasVector() const { return HasVector; } + + // Return true if GV can be accessed using LARL for reloc model RM + // and code model CM. + bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, + CodeModel::Model CM) const; + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp new file mode 100644 index 0000000..f305e85 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -0,0 +1,187 @@ +//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "SystemZTargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +using namespace llvm; + +extern cl::opt<bool> MISchedPostRA; +extern "C" void LLVMInitializeSystemZTarget() { + // Register the target. + RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget); +} + +// Determine whether we use the vector ABI. +static bool UsesVectorABI(StringRef CPU, StringRef FS) { + // We use the vector ABI whenever the vector facility is avaiable. + // This is the case by default if CPU is z13 or later, and can be + // overridden via "[+-]vector" feature string elements. + bool VectorABI = true; + if (CPU.empty() || CPU == "generic" || + CPU == "z10" || CPU == "z196" || CPU == "zEC12") + VectorABI = false; + + SmallVector<StringRef, 3> Features; + FS.split(Features, ',', -1, false /* KeepEmpty */); + for (auto &Feature : Features) { + if (Feature == "vector" || Feature == "+vector") + VectorABI = true; + if (Feature == "-vector") + VectorABI = false; + } + + return VectorABI; +} + +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + StringRef FS) { + bool VectorABI = UsesVectorABI(CPU, FS); + std::string Ret = ""; + + // Big endian. + Ret += "E"; + + // Data mangling. + Ret += DataLayout::getManglingComponent(TT); + + // Make sure that global data has at least 16 bits of alignment by + // default, so that we can refer to it using LARL. We don't have any + // special requirements for stack variables though. + Ret += "-i1:8:16-i8:8:16"; + + // 64-bit integers are naturally aligned. + Ret += "-i64:64"; + + // 128-bit floats are aligned only to 64 bits. + Ret += "-f128:64"; + + // When using the vector ABI, 128-bit vectors are also aligned to 64 bits. + if (VectorABI) + Ret += "-v128:64"; + + // We prefer 16 bits of aligned for all globals; see above. + Ret += "-a:8:16"; + + // Integer registers are 32 or 64 bits. + Ret += "-n32:64"; + + return Ret; +} + +SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options, + RM, CM, OL), + TLOF(make_unique<TargetLoweringObjectFileELF>()), + Subtarget(TT, CPU, FS, *this) { + initAsmInfo(); +} + +SystemZTargetMachine::~SystemZTargetMachine() {} + +namespace { +/// SystemZ Code Generator Pass Configuration Options. +class SystemZPassConfig : public TargetPassConfig { +public: + SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + SystemZTargetMachine &getSystemZTargetMachine() const { + return getTM<SystemZTargetMachine>(); + } + + void addIRPasses() override; + bool addInstSelector() override; + void addPreSched2() override; + void addPreEmitPass() override; +}; +} // end anonymous namespace + +void SystemZPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); +} + +bool SystemZPassConfig::addInstSelector() { + addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZLDCleanupPass(getSystemZTargetMachine())); + + return false; +} + +void SystemZPassConfig::addPreSched2() { + if (getOptLevel() != CodeGenOpt::None && + getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond()) + addPass(&IfConverterID); +} + +void SystemZPassConfig::addPreEmitPass() { + + // Do instruction shortening before compare elimination because some + // vector instructions will be shortened into opcodes that compare + // elimination recognizes. + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false); + + // We eliminate comparisons here rather than earlier because some + // transformations can change the set of available CC values and we + // generally want those transformations to have priority. This is + // especially true in the commonest case where the result of the comparison + // is used by a single in-range branch instruction, since we will then + // be able to fuse the compare and the branch instead. + // + // For example, two-address NILF can sometimes be converted into + // three-address RISBLG. NILF produces a CC value that indicates whether + // the low word is zero, but RISBLG does not modify CC at all. On the + // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. + // The CC value produced by NILL isn't useful for our purposes, but the + // value produced by RISBG can be used for any comparison with zero + // (not just equality). So there are some transformations that lose + // CC values (while still being worthwhile) and others that happen to make + // the CC result more useful than it was originally. + // + // Another reason is that we only want to use BRANCH ON COUNT in cases + // where we know that the count register is not going to be spilled. + // + // Doing it so late makes it more likely that a register will be reused + // between the comparison and the branch, but it isn't clear whether + // preventing that would be a win or not. + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false); + addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); + + // Do final scheduling after all other optimizations, to get an + // optimal input for the decoder (branch relaxation must happen + // after block placement). + if (getOptLevel() != CodeGenOpt::None) { + if (MISchedPostRA) + addPass(&PostMachineSchedulerID); + else + addPass(&PostRASchedulerID); + } +} + +TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SystemZPassConfig(this, PM); +} + +TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(SystemZTTIImpl(this, F)); + }); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h new file mode 100644 index 0000000..1a8f1f7 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -0,0 +1,53 @@ +//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H + +#include "SystemZSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class TargetFrameLowering; + +class SystemZTargetMachine : public LLVMTargetMachine { + std::unique_ptr<TargetLoweringObjectFile> TLOF; + SystemZSubtarget Subtarget; + +public: + SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~SystemZTargetMachine() override; + + const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } + const SystemZSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + // Override LLVMTargetMachine + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + bool targetSchedulesPostRAScheduling() const override { return true; }; + +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp new file mode 100644 index 0000000..5ff5b21 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -0,0 +1,258 @@ +//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a TargetTransformInfo analysis pass specific to the +// SystemZ target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "systemztti" + +//===----------------------------------------------------------------------===// +// +// SystemZ cost model. +// +//===----------------------------------------------------------------------===// + +int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + if (Imm == 0) + return TTI::TCC_Free; + + if (Imm.getBitWidth() <= 64) { + // Constants loaded via lgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llihf: + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Basic; + + return 2 * TTI::TCC_Basic; + } + + return 4 * TTI::TCC_Basic; +} + +int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (Opcode) { + default: + return TTI::TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; + case Instruction::Store: + if (Idx == 0 && Imm.getBitWidth() <= 64) { + // Any 8-bit immediate store can by implemented via mvi. + if (BitSize == 8) + return TTI::TCC_Free; + // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. + if (isInt<16>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::ICmp: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Comparisons against signed 32-bit immediates implemented via cgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + // Comparisons against unsigned 32-bit immediates implemented via clgfi. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Add: + case Instruction::Sub: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Or their negation, by swapping addition vs. subtraction. + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Mul: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use msgfi to multiply by 32-bit signed immediates. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Or: + case Instruction::Xor: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Masks supported by oilf/xilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Masks supported by oihf/xihf. + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Free; + } + break; + case Instruction::And: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Any 32-bit AND operation can by implemented via nilf. + if (BitSize <= 32) + return TTI::TCC_Free; + // 64-bit masks supported by nilf. + if (isUInt<32>(~Imm.getZExtValue())) + return TTI::TCC_Free; + // 64-bit masks supported by nilh. + if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) + return TTI::TCC_Free; + // Some 64-bit AND operations can be implemented via risbg. + const SystemZInstrInfo *TII = ST->getInstrInfo(); + unsigned Start, End; + if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) + return TTI::TCC_Free; + } + break; + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + // Always return TCC_Free for the shift value of a shift instruction. + if (Idx == 1) + return TTI::TCC_Free; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (IID) { + default: + return TTI::TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + // These get expanded to include a normal addition/subtraction. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + // These get expanded to include a normal multiplication. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + } + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +TargetTransformInfo::PopcntSupportKind +SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2"); + if (ST->hasPopulationCount() && TyWidth <= 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} + +unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { + if (!Vector) + // Discount the stack pointer. Also leave out %r0, since it can't + // be used in an address. + return 14; + if (ST->hasVector()) + return 32; + return 0; +} + +unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) { + if (!Vector) + return 64; + if (ST->hasVector()) + return 128; + return 0; +} + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h new file mode 100644 index 0000000..9ae736d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -0,0 +1,66 @@ +//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H + +#include "SystemZTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> { + typedef BasicTTIImplBase<SystemZTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const SystemZSubtarget *ST; + const SystemZTargetLowering *TLI; + + const SystemZSubtarget *getST() const { return ST; } + const SystemZTargetLowering *getTLI() const { return TLI; } + +public: + explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + SystemZTTIImpl(const SystemZTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + SystemZTTIImpl(SystemZTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + + /// \name Scalar TTI Implementations + /// @{ + + int getIntImmCost(const APInt &Imm, Type *Ty); + + int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector); + unsigned getRegisterBitWidth(bool Vector); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp new file mode 100644 index 0000000..8f9aa28 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target llvm::TheSystemZTarget; + +extern "C" void LLVMInitializeSystemZTargetInfo() { + RegisterTarget<Triple::systemz, /*HasJIT=*/true> + X(TheSystemZTarget, "systemz", "SystemZ"); +} |