diff options
Diffstat (limited to 'contrib/llvm/lib/Target/SystemZ')
46 files changed, 8153 insertions, 1815 deletions
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index c7725a1..763f40c 100644 --- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -28,21 +30,29 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) { } namespace { +enum RegisterKind { + GR32Reg, + GRH32Reg, + GR64Reg, + GR128Reg, + ADDR32Reg, + ADDR64Reg, + FP32Reg, + FP64Reg, + FP128Reg +}; + +enum MemoryKind { + BDMem, + BDXMem, + BDLMem +}; + class SystemZOperand : public MCParsedAsmOperand { public: - enum RegisterKind { - GR32Reg, - GR64Reg, - GR128Reg, - ADDR32Reg, - ADDR64Reg, - FP32Reg, - FP64Reg, - FP128Reg - }; - private: enum OperandKind { + KindInvalid, KindToken, KindReg, KindAccessReg, @@ -59,7 +69,15 @@ private: unsigned Length; }; - // LLVM register Num, which has kind Kind. + // LLVM register Num, which has kind Kind. In some ways it might be + // easier for this class to have a register bank (general, floating-point + // or access) and a raw register number (0-15). This would postpone the + // interpretation of the operand to the add*() methods and avoid the need + // for context-dependent parsing. However, we do things the current way + // because of the virtual getReg() method, which needs to distinguish + // between (say) %r0 used as a single register and %r0 used as a pair. + // Context-dependent parsing can also give us slightly better error + // messages when invalid pairs like %r1 are used. struct RegOp { RegisterKind Kind; unsigned Num; @@ -67,12 +85,15 @@ private: // Base + Disp + Index, where Base and Index are LLVM registers or 0. // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg). + // Length is the operand length for D(L,B)-style operands, otherwise + // it is null. struct MemOp { unsigned Base : 8; unsigned Index : 8; unsigned RegKind : 8; unsigned Unused : 8; const MCExpr *Disp; + const MCExpr *Length; }; union { @@ -99,6 +120,9 @@ private: public: // Create particular kinds of operand. + static SystemZOperand *createInvalid(SMLoc StartLoc, SMLoc EndLoc) { + return new SystemZOperand(KindInvalid, StartLoc, EndLoc); + } static SystemZOperand *createToken(StringRef Str, SMLoc Loc) { SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc); Op->Token.Data = Str.data(); @@ -126,12 +150,14 @@ public: } static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp, unsigned Index, - SMLoc StartLoc, SMLoc EndLoc) { + const MCExpr *Length, SMLoc StartLoc, + SMLoc EndLoc) { SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc); Op->Mem.RegKind = RegKind; Op->Mem.Base = Base; Op->Mem.Index = Index; Op->Mem.Disp = Disp; + Op->Mem.Length = Length; return Op; } @@ -178,16 +204,20 @@ public: virtual bool isMem() const LLVM_OVERRIDE { return Kind == KindMem; } - bool isMem(RegisterKind RegKind, bool HasIndex) const { + bool isMem(RegisterKind RegKind, MemoryKind MemKind) const { return (Kind == KindMem && Mem.RegKind == RegKind && - (HasIndex || !Mem.Index)); + (MemKind == BDXMem || !Mem.Index) && + (MemKind == BDLMem) == (Mem.Length != 0)); + } + bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const { + return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff); } - bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const { - return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff); + bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const { + return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287); } - bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const { - return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287); + bool isMemDisp12Len8(RegisterKind RegKind) const { + return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100); } // Override MCParsedAsmOperand. @@ -223,9 +253,18 @@ public: addExpr(Inst, Mem.Disp); Inst.addOperand(MCOperand::CreateReg(Mem.Index)); } + void addBDLAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(Kind == KindMem && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + addExpr(Inst, Mem.Length); + } // Used by the TableGen code to check for particular operand types. bool isGR32() const { return isReg(GR32Reg); } + bool isGRH32() const { return isReg(GRH32Reg); } + bool isGRX32() const { return false; } bool isGR64() const { return isReg(GR64Reg); } bool isGR128() const { return isReg(GR128Reg); } bool isADDR32() const { return isReg(ADDR32Reg); } @@ -234,12 +273,13 @@ public: bool isFP32() const { return isReg(FP32Reg); } bool isFP64() const { return isReg(FP64Reg); } bool isFP128() const { return isReg(FP128Reg); } - bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); } - bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); } - bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); } - bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); } - bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); } - bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); } + bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); } + bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); } + bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); } + bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); } + bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); } + bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); } + bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } bool isU4Imm() const { return isImm(0, 15); } bool isU6Imm() const { return isImm(0, 63); } bool isU8Imm() const { return isImm(0, 255); } @@ -250,46 +290,6 @@ public: bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); } }; -// Maps of asm register numbers to LLVM register numbers, with 0 indicating -// an invalid register. We don't use register class directly because that -// specifies the allocation order. -static const unsigned GR32Regs[] = { - SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, - SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W, - SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W, - SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W -}; -static const unsigned GR64Regs[] = { - SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, - SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, - SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, - SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D -}; -static const unsigned GR128Regs[] = { - SystemZ::R0Q, 0, SystemZ::R2Q, 0, - SystemZ::R4Q, 0, SystemZ::R6Q, 0, - SystemZ::R8Q, 0, SystemZ::R10Q, 0, - SystemZ::R12Q, 0, SystemZ::R14Q, 0 -}; -static const unsigned FP32Regs[] = { - SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, - SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, - SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, - SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S -}; -static const unsigned FP64Regs[] = { - SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, - SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, - SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, - SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D -}; -static const unsigned FP128Regs[] = { - SystemZ::F0Q, SystemZ::F1Q, 0, 0, - SystemZ::F4Q, SystemZ::F5Q, 0, 0, - SystemZ::F8Q, SystemZ::F9Q, 0, 0, - SystemZ::F12Q, SystemZ::F13Q, 0, 0 -}; - class SystemZAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "SystemZGenAsmMatcher.inc" @@ -297,35 +297,42 @@ class SystemZAsmParser : public MCTargetAsmParser { private: MCSubtargetInfo &STI; MCAsmParser &Parser; + enum RegisterGroup { + RegGR, + RegFP, + RegAccess + }; struct Register { - char Prefix; - unsigned Number; + RegisterGroup Group; + unsigned Num; SMLoc StartLoc, EndLoc; }; bool parseRegister(Register &Reg); - OperandMatchResultTy - parseRegister(Register &Reg, char Prefix, const unsigned *Regs, - bool IsAddress = false); + bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs, + bool IsAddress = false); OperandMatchResultTy parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - char Prefix, const unsigned *Regs, - SystemZOperand::RegisterKind Kind, - bool IsAddress = false); + RegisterGroup Group, const unsigned *Regs, RegisterKind Kind); + + bool parseAddress(unsigned &Base, const MCExpr *&Disp, + unsigned &Index, const MCExpr *&Length, + const unsigned *Regs, RegisterKind RegKind); OperandMatchResultTy parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const unsigned *Regs, SystemZOperand::RegisterKind RegKind, - bool HasIndex); + const unsigned *Regs, RegisterKind RegKind, + MemoryKind MemKind); bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Mnemonic); public: - SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(sti), Parser(parser) { MCAsmParserExtension::Initialize(Parser); // Initialize the set of available features. @@ -349,25 +356,31 @@ public: // Used by the TableGen code to parse particular operand types. OperandMatchResultTy parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); + } + OperandMatchResultTy + parseGRH32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg); + } + OperandMatchResultTy + parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + llvm_unreachable("GRX32 should only be used for pseudo instructions"); } OperandMatchResultTy parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); } OperandMatchResultTy parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); } OperandMatchResultTy parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg, - true); + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg); } OperandMatchResultTy parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg, - true); + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg); } OperandMatchResultTy parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -375,30 +388,45 @@ public: } OperandMatchResultTy parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); } OperandMatchResultTy parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); } OperandMatchResultTy parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); } OperandMatchResultTy parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false); + return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem); } OperandMatchResultTy parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false); + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem); } OperandMatchResultTy parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true); + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem); + } + OperandMatchResultTy + parseBDLAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem); } OperandMatchResultTy parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + OperandMatchResultTy + parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + int64_t MinVal, int64_t MaxVal); + OperandMatchResultTy + parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1); + } + OperandMatchResultTy + parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1); + } }; } @@ -417,122 +445,160 @@ bool SystemZAsmParser::parseRegister(Register &Reg) { // Eat the % prefix. if (Parser.getTok().isNot(AsmToken::Percent)) - return true; + return Error(Parser.getTok().getLoc(), "register expected"); Parser.Lex(); // Expect a register name. if (Parser.getTok().isNot(AsmToken::Identifier)) - return true; + return Error(Reg.StartLoc, "invalid register"); - // Check the prefix. + // Check that there's a prefix. StringRef Name = Parser.getTok().getString(); if (Name.size() < 2) - return true; - Reg.Prefix = Name[0]; + return Error(Reg.StartLoc, "invalid register"); + char Prefix = Name[0]; // Treat the rest of the register name as a register number. - if (Name.substr(1).getAsInteger(10, Reg.Number)) - return true; + if (Name.substr(1).getAsInteger(10, Reg.Num)) + return Error(Reg.StartLoc, "invalid register"); + + // Look for valid combinations of prefix and number. + if (Prefix == 'r' && Reg.Num < 16) + Reg.Group = RegGR; + else if (Prefix == 'f' && Reg.Num < 16) + Reg.Group = RegFP; + else if (Prefix == 'a' && Reg.Num < 16) + Reg.Group = RegAccess; + else + return Error(Reg.StartLoc, "invalid register"); Reg.EndLoc = Parser.getTok().getLoc(); Parser.Lex(); return false; } -// Parse a register with prefix Prefix and convert it to LLVM numbering. -// Regs maps asm register numbers to LLVM register numbers, with zero -// entries indicating an invalid register. IsAddress says whether the -// register appears in an address context. -SystemZAsmParser::OperandMatchResultTy -SystemZAsmParser::parseRegister(Register &Reg, char Prefix, - const unsigned *Regs, bool IsAddress) { +// Parse a register of group Group. If Regs is nonnull, use it to map +// the raw register number to LLVM numbering, with zero entries indicating +// an invalid register. IsAddress says whether the register appears in an +// address context. +bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, + const unsigned *Regs, bool IsAddress) { if (parseRegister(Reg)) - return MatchOperand_NoMatch; - if (Reg.Prefix != Prefix || Reg.Number > 15 || Regs[Reg.Number] == 0) { - Error(Reg.StartLoc, "invalid register"); - return MatchOperand_ParseFail; - } - if (Reg.Number == 0 && IsAddress) { - Error(Reg.StartLoc, "%r0 used in an address"); - return MatchOperand_ParseFail; - } - Reg.Number = Regs[Reg.Number]; - return MatchOperand_Success; + return true; + if (Reg.Group != Group) + return Error(Reg.StartLoc, "invalid operand for instruction"); + if (Regs && Regs[Reg.Num] == 0) + return Error(Reg.StartLoc, "invalid register pair"); + if (Reg.Num == 0 && IsAddress) + return Error(Reg.StartLoc, "%r0 used in an address"); + if (Regs) + Reg.Num = Regs[Reg.Num]; + return false; } -// Parse a register and add it to Operands. Prefix is 'r' for GPRs, -// 'f' for FPRs, etc. Regs maps asm register numbers to LLVM register numbers, -// with zero entries indicating an invalid register. Kind is the type of -// register represented by Regs and IsAddress says whether the register is -// being parsed in an address context, meaning that %r0 evaluates as 0. +// Parse a register and add it to Operands. The other arguments are as above. SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - char Prefix, const unsigned *Regs, - SystemZOperand::RegisterKind Kind, - bool IsAddress) { + RegisterGroup Group, const unsigned *Regs, + RegisterKind Kind) { + if (Parser.getTok().isNot(AsmToken::Percent)) + return MatchOperand_NoMatch; + Register Reg; - OperandMatchResultTy Result = parseRegister(Reg, Prefix, Regs, IsAddress); - if (Result == MatchOperand_Success) - Operands.push_back(SystemZOperand::createReg(Kind, Reg.Number, - Reg.StartLoc, Reg.EndLoc)); - return Result; -} + bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg); + if (parseRegister(Reg, Group, Regs, IsAddress)) + return MatchOperand_ParseFail; -// Parse a memory operand and add it to Operands. Regs maps asm register -// numbers to LLVM address registers and RegKind says what kind of address -// register we're using (ADDR32Reg or ADDR64Reg). HasIndex says whether -// the address allows index registers. -SystemZAsmParser::OperandMatchResultTy -SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const unsigned *Regs, - SystemZOperand::RegisterKind RegKind, - bool HasIndex) { - SMLoc StartLoc = Parser.getTok().getLoc(); + Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num, + Reg.StartLoc, Reg.EndLoc)); + return MatchOperand_Success; +} +// Parse a memory operand into Base, Disp, Index and Length. +// Regs maps asm register numbers to LLVM register numbers and RegKind +// says what kind of address register we're using (ADDR32Reg or ADDR64Reg). +bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, + unsigned &Index, const MCExpr *&Length, + const unsigned *Regs, + RegisterKind RegKind) { // Parse the displacement, which must always be present. - const MCExpr *Disp; if (getParser().parseExpression(Disp)) - return MatchOperand_NoMatch; + return true; // Parse the optional base and index. - unsigned Index = 0; - unsigned Base = 0; + Index = 0; + Base = 0; + Length = 0; if (getLexer().is(AsmToken::LParen)) { Parser.Lex(); - // Parse the first register. - Register Reg; - OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true); - if (Result != MatchOperand_Success) - return Result; + if (getLexer().is(AsmToken::Percent)) { + // Parse the first register and decide whether it's a base or an index. + Register Reg; + if (parseRegister(Reg, RegGR, Regs, RegKind)) + return true; + if (getLexer().is(AsmToken::Comma)) + Index = Reg.Num; + else + Base = Reg.Num; + } else { + // Parse the length. + if (getParser().parseExpression(Length)) + return true; + } - // Check whether there's a second register. If so, the one that we - // just parsed was the index. + // Check whether there's a second register. It's the base if so. if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); - - if (!HasIndex) { - Error(Reg.StartLoc, "invalid use of indexed addressing"); - return MatchOperand_ParseFail; - } - - Index = Reg.Number; - Result = parseRegister(Reg, 'r', GR64Regs, true); - if (Result != MatchOperand_Success) - return Result; + Register Reg; + if (parseRegister(Reg, RegGR, Regs, RegKind)) + return true; + Base = Reg.Num; } - Base = Reg.Number; // Consume the closing bracket. if (getLexer().isNot(AsmToken::RParen)) - return MatchOperand_NoMatch; + return Error(Parser.getTok().getLoc(), "unexpected token in address"); Parser.Lex(); } + return false; +} + +// Parse a memory operand and add it to Operands. The other arguments +// are as above. +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + const unsigned *Regs, RegisterKind RegKind, + MemoryKind MemKind) { + SMLoc StartLoc = Parser.getTok().getLoc(); + unsigned Base, Index; + const MCExpr *Disp; + const MCExpr *Length; + if (parseAddress(Base, Disp, Index, Length, Regs, RegKind)) + return MatchOperand_ParseFail; + + if (Index && MemKind != BDXMem) + { + Error(StartLoc, "invalid use of indexed addressing"); + return MatchOperand_ParseFail; + } + + if (Length && MemKind != BDLMem) + { + Error(StartLoc, "invalid use of length addressing"); + return MatchOperand_ParseFail; + } + + if (!Length && MemKind == BDLMem) + { + Error(StartLoc, "missing length in address"); + return MatchOperand_ParseFail; + } SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index, - StartLoc, EndLoc)); + Length, StartLoc, EndLoc)); return MatchOperand_Success; } @@ -544,13 +610,14 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { Register Reg; if (parseRegister(Reg)) - return Error(Reg.StartLoc, "register expected"); - if (Reg.Prefix == 'r' && Reg.Number < 16) - RegNo = GR64Regs[Reg.Number]; - else if (Reg.Prefix == 'f' && Reg.Number < 16) - RegNo = FP64Regs[Reg.Number]; + return true; + if (Reg.Group == RegGR) + RegNo = SystemZMC::GR64Regs[Reg.Num]; + else if (Reg.Group == RegFP) + RegNo = SystemZMC::FP64Regs[Reg.Num]; else - return Error(Reg.StartLoc, "invalid register"); + // FIXME: Access registers aren't modelled as LLVM registers yet. + return Error(Reg.StartLoc, "invalid operand for instruction"); StartLoc = Reg.StartLoc; EndLoc = Reg.EndLoc; return false; @@ -604,15 +671,33 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, if (ResTy == MatchOperand_ParseFail) return true; - // The only other type of operand is an immediate. - const MCExpr *Expr; + // Check for a register. All real register operands should have used + // a context-dependent parse routine, which gives the required register + // class. The code is here to mop up other cases, like those where + // the instruction isn't recognized. + if (Parser.getTok().is(AsmToken::Percent)) { + Register Reg; + if (parseRegister(Reg)) + return true; + Operands.push_back(SystemZOperand::createInvalid(Reg.StartLoc, Reg.EndLoc)); + return false; + } + + // The only other type of operand is an immediate or address. As above, + // real address operands should have used a context-dependent parse routine, + // so we treat any plain expression as an immediate. SMLoc StartLoc = Parser.getTok().getLoc(); - if (getParser().parseExpression(Expr)) + unsigned Base, Index; + const MCExpr *Expr, *Length; + if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg)) return true; SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + if (Base || Index || Length) + Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc)); + else + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); return false; } @@ -671,15 +756,47 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SystemZAsmParser::OperandMatchResultTy SystemZAsmParser:: parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - Register Reg; - if (parseRegister(Reg)) + if (Parser.getTok().isNot(AsmToken::Percent)) return MatchOperand_NoMatch; - if (Reg.Prefix != 'a' || Reg.Number > 15) { - Error(Reg.StartLoc, "invalid register"); + + Register Reg; + if (parseRegister(Reg, RegAccess, 0)) return MatchOperand_ParseFail; + + Operands.push_back(SystemZOperand::createAccessReg(Reg.Num, + Reg.StartLoc, + Reg.EndLoc)); + return MatchOperand_Success; +} + +SystemZAsmParser::OperandMatchResultTy SystemZAsmParser:: +parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + int64_t MinVal, int64_t MaxVal) { + MCContext &Ctx = getContext(); + MCStreamer &Out = getStreamer(); + const MCExpr *Expr; + SMLoc StartLoc = Parser.getTok().getLoc(); + if (getParser().parseExpression(Expr)) + return MatchOperand_NoMatch; + + // For consistency with the GNU assembler, treat immediates as offsets + // from ".". + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { + int64_t Value = CE->getValue(); + if ((Value & 1) || Value < MinVal || Value > MaxVal) { + Error(StartLoc, "offset out of range"); + return MatchOperand_ParseFail; + } + MCSymbol *Sym = Ctx.CreateTempSymbol(); + Out.EmitLabel(Sym); + const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + Ctx); + Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); } - Operands.push_back(SystemZOperand::createAccessReg(Reg.Number, - Reg.StartLoc, Reg.EndLoc)); + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); return MatchOperand_Success; } diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp new file mode 100644 index 0000000..fc3c38d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -0,0 +1,323 @@ +//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class SystemZDisassembler : public MCDisassembler { +public: + SystemZDisassembler(const MCSubtargetInfo &STI) + : MCDisassembler(STI) {} + virtual ~SystemZDisassembler() {} + + // Override MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const LLVM_OVERRIDE; +}; +} // end anonymous namespace + +static MCDisassembler *createSystemZDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new SystemZDisassembler(STI); +} + +extern "C" void LLVMInitializeSystemZDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheSystemZTarget, + createSystemZDisassembler); +} + +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned *Regs) { + assert(RegNo < 16 && "Invalid register"); + RegNo = Regs[RegNo]; + if (RegNo == 0) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs); +} + +static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs); +} + +static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); +} + +static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs); +} + +static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); +} + +static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs); +} + +static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs); +} + +static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs); +} + +template<unsigned N> +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + +template<unsigned N> +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +} + +static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<6>(Inst, Imm); +} + +static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<32>(Inst, Imm); +} + +static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<8>(Inst, Imm); +} + +static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<16>(Inst, Imm); +} + +static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeSImmOperand<32>(Inst, Imm); +} + +template<unsigned N> +static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address) { + assert(isUInt<N>(Imm) && "Invalid PC-relative offset"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address)); + return MCDisassembler::Success; +} + +static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<16>(Inst, Imm, Address); +} + +static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<32>(Inst, Imm, Address); +} + +static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 12; + uint64_t Disp = Field & 0xfff; + assert(Base < 16 && "Invalid BDAddr12"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Base = Field >> 20; + uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff); + assert(Base < 16 && "Invalid BDAddr20"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Index < 16 && "Invalid BDXAddr12"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 24; + uint64_t Base = (Field >> 20) & 0xf; + uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12); + assert(Index < 16 && "Invalid BDXAddr20"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); + Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Length = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Length < 256 && "Invalid BDLAddr12Len8"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + Inst.addOperand(MCOperand::CreateImm(Length + 1)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs); +} + +static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); +} + +static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, + uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); +} + +#include "SystemZGenDisassemblerTables.inc" + +DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + // Get the first two bytes of the instruction. + uint8_t Bytes[6]; + Size = 0; + if (Region.readBytes(Address, 2, Bytes) == -1) + return MCDisassembler::Fail; + + // The top 2 bits of the first byte specify the size. + const uint8_t *Table; + if (Bytes[0] < 0x40) { + Size = 2; + Table = DecoderTable16; + } else if (Bytes[0] < 0xc0) { + Size = 4; + Table = DecoderTable32; + } else { + Size = 6; + Table = DecoderTable48; + } + + // Read any remaining bytes. + if (Size > 2 && Region.readBytes(Address + 2, Size - 2, Bytes + 2) == -1) + return MCDisassembler::Fail; + + // Construct the instruction. + uint64_t Inst = 0; + for (uint64_t I = 0; I < Size; ++I) + Inst = (Inst << 8) | Bytes[I]; + + return decodeInstruction(Table, MI, Inst, Address, this, STI); +} diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index d73cf49..e1e64d3 100644 --- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -114,10 +114,14 @@ void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, O << "%a" << (unsigned int)Value; } -void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { - printOperand(MI, OpNum, O); - O << "@PLT"; +void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "0x"; + O.write_hex(MO.getImm()); + } else + O << *MO.getExpr(); } void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, @@ -138,6 +142,17 @@ void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum, MI->getOperand(OpNum + 2).getReg(), O); } +void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + unsigned Base = MI->getOperand(OpNum).getReg(); + uint64_t Disp = MI->getOperand(OpNum + 1).getImm(); + uint64_t Length = MI->getOperand(OpNum + 2).getImm(); + O << Disp << '(' << Length; + if (Base) + O << ",%" << getRegisterName(Base); + O << ')'; +} + void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O) { static const char *const CondNames[] = { diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index b82e79d..734ecf0 100644 --- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -48,6 +48,7 @@ private: void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); @@ -56,7 +57,7 @@ private: void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index e901c6c..26a8fae 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -35,16 +35,6 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) { llvm_unreachable("Unknown fixup kind!"); } -// If Opcode can be relaxed, return the relaxed form, otherwise return 0. -static unsigned getRelaxedOpcode(unsigned Opcode) { - switch (Opcode) { - case SystemZ::BRC: return SystemZ::BRCL; - case SystemZ::J: return SystemZ::JG; - case SystemZ::BRAS: return SystemZ::BRASL; - } - return 0; -} - namespace { class SystemZMCAsmBackend : public MCAsmBackend { uint8_t OSABI; @@ -60,14 +50,20 @@ public: LLVM_OVERRIDE; virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const LLVM_OVERRIDE; - virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE; + virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE { + return false; + } virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *Fragment, const MCAsmLayout &Layout) const - LLVM_OVERRIDE; + LLVM_OVERRIDE { + return false; + } virtual void relaxInstruction(const MCInst &Inst, - MCInst &Res) const LLVM_OVERRIDE; + MCInst &Res) const LLVM_OVERRIDE { + llvm_unreachable("SystemZ does do not have assembler relaxation"); + } virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const LLVM_OVERRIDE; virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const @@ -115,28 +111,6 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } -bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { - return getRelaxedOpcode(Inst.getOpcode()) != 0; -} - -bool -SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *Fragment, - const MCAsmLayout &Layout) const { - // At the moment we just need to relax 16-bit fields to wider fields. - Value = extractBitsForFixup(Fixup.getKind(), Value); - return (int16_t)Value != (int64_t)Value; -} - -void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst, - MCInst &Res) const { - unsigned Opcode = getRelaxedOpcode(Inst.getOpcode()); - assert(Opcode && "Unexpected insn to relax"); - Res = Inst; - Res.setOpcode(Opcode); -} - bool SystemZMCAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { for (uint64_t I = 0; I != Count; ++I) @@ -144,8 +118,9 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count, return true; } -MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT, - StringRef CPU) { +MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); return new SystemZMCAsmBackend(OSABI); } diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index c96a0d4..965c41e 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -13,16 +13,14 @@ using namespace llvm; -SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { +SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) { PointerSize = 8; CalleeSaveStackSlotSize = 8; IsLittleEndian = false; CommentString = "#"; - PCSymbol = "."; GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; - WeakRefDirective = "\t.weak\t"; ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; UsesELFSectionDirectiveForBSS = true; diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h index bac1bca..b9ac92a 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h @@ -10,16 +10,15 @@ #ifndef SystemZTARGETASMINFO_H #define SystemZTARGETASMINFO_H -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoELF.h" #include "llvm/Support/Compiler.h" namespace llvm { -class Target; class StringRef; -class SystemZMCAsmInfo : public MCAsmInfo { +class SystemZMCAsmInfo : public MCAsmInfoELF { public: - explicit SystemZMCAsmInfo(const Target &T, StringRef TT); + explicit SystemZMCAsmInfo(StringRef TT); // Override MCAsmInfo; virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index ea2250f..f07ea7b 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -45,33 +45,40 @@ private: // Called by the TableGen code to get the binary encoding of operand // MO in MI. Fixups is the list of fixups against MI. - unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const; + // Called by the TableGen code to get the binary encoding of an address. + // The index or length, if any, is encoded first, followed by the base, + // followed by the displacement. In a 20-bit displacement, + // the low 12 bits are encoded before the high 8 bits. + uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + // Operand OpNum of MI needs a PC-relative fixup of kind Kind at // Offset bytes from the start of MI. Add the fixup to Fixups // and return the in-place addend, which since we're a RELA target // is always 0. - unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, unsigned Kind, int64_t Offset) const; - unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2); } - unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum, + uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); } - unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum, - SmallVectorImpl<MCFixup> &Fixups) const { - return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2); - } - unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum, - SmallVectorImpl<MCFixup> &Fixups) const { - return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2); - } }; } @@ -95,34 +102,83 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, } } -unsigned SystemZMCCodeEmitter:: +uint64_t SystemZMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) - return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); + return static_cast<uint64_t>(MO.getImm()); llvm_unreachable("Unexpected operand type!"); } -unsigned -SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum, +uint64_t SystemZMCCodeEmitter:: +getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + assert(isUInt<4>(Base) && isUInt<12>(Disp)); + return (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + assert(isUInt<4>(Base) && isInt<20>(Disp)); + return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12); +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups); + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index)); + return (Index << 16) | (Base << 12) | Disp; +} + +uint64_t SystemZMCCodeEmitter:: +getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups); + assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index)); + return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8) + | ((Disp & 0xff000) >> 12); +} + +uint64_t SystemZMCCodeEmitter:: +getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups) - 1; + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len)); + return (Len << 16) | (Base << 12) | Disp; +} + +uint64_t +SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, unsigned Kind, int64_t Offset) const { const MCOperand &MO = MI.getOperand(OpNum); - // For compatibility with the GNU assembler, treat constant operands as - // unadjusted PC-relative offsets. + const MCExpr *Expr; if (MO.isImm()) - return MO.getImm() / 2; - - const MCExpr *Expr = MO.getExpr(); - if (Offset) { - // The operand value is relative to the start of MI, but the fixup - // is relative to the operand field itself, which is Offset bytes - // into MI. Add Offset to the relocation value to cancel out - // this difference. - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx); + else { + Expr = MO.getExpr(); + if (Offset) { + // The operand value is relative to the start of MI, but the fixup + // is relative to the operand field itself, which is Offset bytes + // into MI. Add Offset to the relocation value to cancel out + // this difference. + const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + } } Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind)); return 0; diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 49a7f47..9e1296b 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -27,11 +27,80 @@ using namespace llvm; -static MCAsmInfo *createSystemZMCAsmInfo(const Target &T, StringRef TT) { - MCAsmInfo *MAI = new SystemZMCAsmInfo(T, TT); - MachineLocation FPDst(MachineLocation::VirtualFP); - MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP); - MAI->addInitialFrameState(0, FPDst, FPSrc); +const unsigned SystemZMC::GR32Regs[16] = { + SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L, + SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L, + SystemZ::R8L, SystemZ::R9L, SystemZ::R10L, SystemZ::R11L, + SystemZ::R12L, SystemZ::R13L, SystemZ::R14L, SystemZ::R15L +}; + +const unsigned SystemZMC::GRH32Regs[16] = { + SystemZ::R0H, SystemZ::R1H, SystemZ::R2H, SystemZ::R3H, + SystemZ::R4H, SystemZ::R5H, SystemZ::R6H, SystemZ::R7H, + SystemZ::R8H, SystemZ::R9H, SystemZ::R10H, SystemZ::R11H, + SystemZ::R12H, SystemZ::R13H, SystemZ::R14H, SystemZ::R15H +}; + +const unsigned SystemZMC::GR64Regs[16] = { + SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, + SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, + SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, + SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D +}; + +const unsigned SystemZMC::GR128Regs[16] = { + SystemZ::R0Q, 0, SystemZ::R2Q, 0, + SystemZ::R4Q, 0, SystemZ::R6Q, 0, + SystemZ::R8Q, 0, SystemZ::R10Q, 0, + SystemZ::R12Q, 0, SystemZ::R14Q, 0 +}; + +const unsigned SystemZMC::FP32Regs[16] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S +}; + +const unsigned SystemZMC::FP64Regs[16] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D +}; + +const unsigned SystemZMC::FP128Regs[16] = { + SystemZ::F0Q, SystemZ::F1Q, 0, 0, + SystemZ::F4Q, SystemZ::F5Q, 0, 0, + SystemZ::F8Q, SystemZ::F9Q, 0, 0, + SystemZ::F12Q, SystemZ::F13Q, 0, 0 +}; + +unsigned SystemZMC::getFirstReg(unsigned Reg) { + static unsigned Map[SystemZ::NUM_TARGET_REGS]; + static bool Initialized = false; + if (!Initialized) { + for (unsigned I = 0; I < 16; ++I) { + Map[GR32Regs[I]] = I; + Map[GRH32Regs[I]] = I; + Map[GR64Regs[I]] = I; + Map[GR128Regs[I]] = I; + Map[FP32Regs[I]] = I; + Map[FP64Regs[I]] = I; + Map[FP128Regs[I]] = I; + } + } + assert(Reg < SystemZ::NUM_TARGET_REGS); + return Map[Reg]; +} + +static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { + MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(SystemZ::R15D, true), + SystemZMC::CFAOffsetFromInitialSP); + MAI->addInitialFrameState(Inst); return MAI; } @@ -118,7 +187,7 @@ static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) { - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + return createELFStreamer(Ctx, 0, MAB, OS, Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeSystemZTargetMC() { diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 229912f..97e325b 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -34,6 +34,39 @@ namespace SystemZMC { // The offset of the DWARF CFA from the incoming stack pointer. const int64_t CFAOffsetFromInitialSP = CallFrameSize; + + // Maps of asm register numbers to LLVM register numbers, with 0 indicating + // an invalid register. In principle we could use 32-bit and 64-bit register + // classes directly, provided that we relegated the GPR allocation order + // in SystemZRegisterInfo.td to an AltOrder and left the default order + // as %r0-%r15. It seems better to provide the same interface for + // all classes though. + extern const unsigned GR32Regs[16]; + extern const unsigned GRH32Regs[16]; + extern const unsigned GR64Regs[16]; + extern const unsigned GR128Regs[16]; + extern const unsigned FP32Regs[16]; + extern const unsigned FP64Regs[16]; + extern const unsigned FP128Regs[16]; + + // Return the 0-based number of the first architectural register that + // contains the given LLVM register. E.g. R1D -> 1. + unsigned getFirstReg(unsigned Reg); + + // Return the given register as a GR64. + inline unsigned getRegAsGR64(unsigned Reg) { + return GR64Regs[getFirstReg(Reg)]; + } + + // Return the given register as a low GR32. + inline unsigned getRegAsGR32(unsigned Reg) { + return GR32Regs[getFirstReg(Reg)]; + } + + // Return the given register as a high GR32. + inline unsigned getRegAsGRH32(unsigned Reg) { + return GRH32Regs[getFirstReg(Reg)]; + } } MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, @@ -41,8 +74,9 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT, - StringRef CPU); +MCAsmBackend *createSystemZMCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI); } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt index d1f56a4..afa6cf0 100644 --- a/contrib/llvm/lib/Target/SystemZ/README.txt +++ b/contrib/llvm/lib/Target/SystemZ/README.txt @@ -29,7 +29,7 @@ to load 103. This seems to be a general target-independent problem. -- -The tuning of the choice between Load Address (LA) and addition in +The tuning of the choice between LOAD ADDRESS (LA) and addition in SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on performance measurements. @@ -39,22 +39,35 @@ There is no scheduling support. -- -We don't use the Branch on Count or Branch on Index families of instruction. +We don't use the BRANCH ON INDEX instructions. -- -We don't use the condition code results of anything except comparisons. +We might want to use BRANCH ON CONDITION for conditional indirect calls +and conditional returns. -Implementing this may need something more finely grained than the z_cmp -and z_ucmp that we have now. It might (or might not) also be useful to -have a mask of "don't care" values in conditional branches. For example, -integer comparisons never set CC to 3, so the bottom bit of the CC mask -isn't particularly relevant. JNLH and JE are equally good for testing -equality after an integer comparison, etc. +-- + +We don't use the TEST DATA CLASS instructions. + +-- + +We could use the generic floating-point forms of LOAD COMPLEMENT, +LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the +condition codes. For example, we could use LCDFR instead of LCDBR. -- -We don't optimize string and block memory operations. +We only use MVC, XC and CLC for constant-length block operations. +We could extend them to variable-length operations too, +using EXECUTE RELATIVE LONG. + +MVCIN, MVCLE and CLCLE may be worthwhile too. + +-- + +We don't use CUSE or the TRANSLATE family of instructions for string +operations. The TRANSLATE ones are probably more difficult to exploit. -- @@ -63,9 +76,21 @@ conventions require f128s to be returned by invisible reference. -- -DAGCombiner can detect integer absolute, but there's not yet an associated -ISD opcode. We could add one and implement it using Load Positive. -Negated absolutes could use Load Negative. +ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to +produce a carry. SUBTRACT LOGICAL IMMEDIATE could be useful when we +need to produce a borrow. (Note that there are no memory forms of +ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high +part of 128-bit memory operations would probably need to be done +via a register.) + +-- + +We don't use the halfword forms of LOAD REVERSED and STORE REVERSED +(LRVH and STRVH). + +-- + +We don't use ICM or STCM. -- @@ -142,5 +167,15 @@ See CodeGen/SystemZ/alloca-01.ll for an example. -- Atomic loads and stores use the default compare-and-swap based implementation. -This is probably much too conservative in practice, and the overhead is -especially bad for 8- and 16-bit accesses. +This is much too conservative in practice, since the architecture guarantees +that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are +inherently atomic. + +-- + +If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. + +-- + +We might want to model all access registers and use them to spill +32-bit values. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h index b811cbe..dcebbad 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZ.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h @@ -30,16 +30,51 @@ namespace llvm { const unsigned CCMASK_3 = 1 << 0; const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; - // Condition-code mask assignments for floating-point comparisons. + // Condition-code mask assignments for integer and floating-point + // comparisons. const unsigned CCMASK_CMP_EQ = CCMASK_0; const unsigned CCMASK_CMP_LT = CCMASK_1; const unsigned CCMASK_CMP_GT = CCMASK_2; - const unsigned CCMASK_CMP_UO = CCMASK_3; const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT; const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT; const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT; + + // Condition-code mask assignments for floating-point comparisons only. + const unsigned CCMASK_CMP_UO = CCMASK_3; const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO; + // All condition-code values produced by comparisons. + const unsigned CCMASK_ICMP = CCMASK_0 | CCMASK_1 | CCMASK_2; + const unsigned CCMASK_FCMP = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; + + // Condition-code mask assignments for CS. + const unsigned CCMASK_CS_EQ = CCMASK_0; + const unsigned CCMASK_CS_NE = CCMASK_1; + const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1; + + // Condition-code mask assignments for a completed SRST loop. + const unsigned CCMASK_SRST_FOUND = CCMASK_1; + const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; + const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + + // Condition-code mask assignments for TEST UNDER MASK. + const unsigned CCMASK_TM_ALL_0 = CCMASK_0; + const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1; + const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2; + const unsigned CCMASK_TM_ALL_1 = CCMASK_3; + const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY; + const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY; + const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1; + const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; + const unsigned CCMASK_TM = CCMASK_ANY; + + // The position of the low CC bit in an IPM result. + const unsigned IPM_CC = 28; + + // Mask assignments for PFD. + const unsigned PFD_READ = 1; + const unsigned PFD_WRITE = 2; + // Return true if Val fits an LLILL operand. static inline bool isImmLL(uint64_t Val) { return (Val & ~0x000000000000ffffULL) == 0; @@ -73,5 +108,8 @@ namespace llvm { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); + FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); + FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); + FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td index e03c32f..abf5c8e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZ.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td @@ -14,13 +14,10 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// SystemZ supported processors +// SystemZ supported processors and features //===----------------------------------------------------------------------===// -class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; - -def : Proc<"z10", []>; +include "SystemZProcessors.td" //===----------------------------------------------------------------------===// // Register file description diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 1e15ab1..75cbda4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -19,16 +19,142 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" using namespace llvm; +// Return an RI instruction like MI with opcode Opcode, but with the +// GR64 register operands turned into GR32s. +static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) { + if (MI->isCompare()) + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(1).getImm()); + else + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg())) + .addImm(MI->getOperand(2).getImm()); +} + +// Return an RI instruction like MI with opcode Opcode, but with the +// GR64 register operands turned into GRH32s. +static MCInst lowerRIHigh(const MachineInstr *MI, unsigned Opcode) { + if (MI->isCompare()) + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(1).getImm()); + else + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(1).getReg())) + .addImm(MI->getOperand(2).getImm()); +} + +// Return an RI instruction like MI with opcode Opcode, but with the +// R2 register turned into a GR64. +static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())) + .addImm(MI->getOperand(3).getImm()) + .addImm(MI->getOperand(4).getImm()) + .addImm(MI->getOperand(5).getImm()); +} + void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SystemZMCInstLower Lower(Mang, MF->getContext(), *this); + SystemZMCInstLower Lower(MF->getContext(), *this); MCInst LoweredMI; - Lower.lower(MI, LoweredMI); + switch (MI->getOpcode()) { + case SystemZ::Return: + LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D); + break; + + case SystemZ::CallBRASL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBASR: + LoweredMI = MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R14D) + .addReg(MI->getOperand(0).getReg()); + break; + + case SystemZ::CallJG: + LoweredMI = MCInstBuilder(SystemZ::JG) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBR: + LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); + break; + + case SystemZ::IILF64: + LoweredMI = MCInstBuilder(SystemZ::IILF) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(2).getImm()); + break; + + case SystemZ::IIHF64: + LoweredMI = MCInstBuilder(SystemZ::IIHF) + .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(2).getImm()); + break; + + case SystemZ::RISBHH: + case SystemZ::RISBHL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG); + break; + + case SystemZ::RISBLH: + case SystemZ::RISBLL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG); + break; + +#define LOWER_LOW(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break + + LOWER_LOW(IILL); + LOWER_LOW(IILH); + LOWER_LOW(TMLL); + LOWER_LOW(TMLH); + LOWER_LOW(NILL); + LOWER_LOW(NILH); + LOWER_LOW(NILF); + LOWER_LOW(OILL); + LOWER_LOW(OILH); + LOWER_LOW(OILF); + LOWER_LOW(XILF); + +#undef LOWER_LOW + +#define LOWER_HIGH(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRIHigh(MI, SystemZ::NAME); break + + LOWER_HIGH(IIHL); + LOWER_HIGH(IIHH); + LOWER_HIGH(TMHL); + LOWER_HIGH(TMHH); + LOWER_HIGH(NIHL); + LOWER_HIGH(NIHH); + LOWER_HIGH(NIHF); + LOWER_HIGH(OIHL); + LOWER_HIGH(OIHH); + LOWER_HIGH(OIHF); + LOWER_HIGH(XIHF); + +#undef LOWER_HIGH + + default: + Lower.lower(MI, LoweredMI); + break; + } OutStreamer.EmitInstruction(LoweredMI); } @@ -48,7 +174,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { static_cast<SystemZConstantPoolValue*>(MCPV); const MCExpr *Expr = - MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()), + MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()), getModifierVariantKind(ZCPV->getModifier()), OutContext); uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType()); @@ -66,7 +192,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, return true; OS << -int64_t(MI->getOperand(OpNo).getImm()); } else { - SystemZMCInstLower Lower(Mang, MF->getContext(), *this); + SystemZMCInstLower Lower(MF->getContext(), *this); MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo))); SystemZInstPrinter::printOperand(MO, OS); } @@ -100,7 +226,7 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0), 0); + TD->getPointerSize(0)); } Stubs.clear(); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td index c2d727f..c4f641e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -23,7 +23,7 @@ def RetCC_SystemZ : CallingConv<[ // call-clobbered argument registers available for code that doesn't // care about the ABI. (R6 is an argument register too, but is // call-saved and therefore not suitable for return values.) - CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>, + CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L]>>, CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>, // ABI-complaint code returns float and double in F0. Make the @@ -53,7 +53,7 @@ def CC_SystemZ : CallingConv<[ // The first 5 integer arguments are passed in R2-R6. Note that R6 // is call-saved. - CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>, + CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L, R6L]>>, CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>, // The first 4 float and double arguments are passed in even registers F0-F6. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index e9c4f6d..6c70811 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -39,7 +39,7 @@ unsigned SystemZConstantPoolValue::getRelocationInfo() const { int SystemZConstantPoolValue:: getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; - const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants(); for (unsigned I = 0, E = Constants.size(); I != E; ++I) { if (Constants[I].isMachineConstantPoolEntry() && (Constants[I].getAlignment() & AlignMask) == 0) { diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp new file mode 100644 index 0000000..b8a77db --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -0,0 +1,471 @@ +//===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass: +// (1) tries to remove compares if CC already contains the required information +// (2) fuses compares and branches into COMPARE AND BRANCH instructions +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-elim-compare" + +#include "SystemZTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); +STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); +STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); + +namespace { + // Represents the references to a particular register in one or more + // instructions. + struct Reference { + Reference() + : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {} + + Reference &operator|=(const Reference &Other) { + Def |= Other.Def; + IndirectDef |= Other.IndirectDef; + Use |= Other.Use; + IndirectUse |= Other.IndirectUse; + return *this; + } + + operator bool() const { return Def || Use; } + + // True if the register is defined or used in some form, either directly or + // via a sub- or super-register. + bool Def; + bool Use; + + // True if the register is defined or used indirectly, by a sub- or + // super-register. + bool IndirectDef; + bool IndirectUse; + }; + + class SystemZElimCompare : public MachineFunctionPass { + public: + static char ID; + SystemZElimCompare(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(0), TRI(0) {} + + virtual const char *getPassName() const { + return "SystemZ Comparison Elimination"; + } + + bool processBlock(MachineBasicBlock *MBB); + bool runOnMachineFunction(MachineFunction &F); + + private: + Reference getRegReferences(MachineInstr *MI, unsigned Reg); + bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool convertToLoadAndTest(MachineInstr *MI); + bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool optimizeCompareZero(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool fuseCompareAndBranch(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + }; + + char SystemZElimCompare::ID = 0; +} // end of anonymous namespace + +FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { + return new SystemZElimCompare(TM); +} + +// Return true if CC is live out of MBB. +static bool isCCLiveOut(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + +// Return true if any CC result of MI would reflect the value of subreg +// SubReg of Reg. +static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) { + if (MI->getNumOperands() > 0 && + MI->getOperand(0).isReg() && + MI->getOperand(0).isDef() && + MI->getOperand(0).getReg() == Reg && + MI->getOperand(0).getSubReg() == SubReg) + return true; + + switch (MI->getOpcode()) { + case SystemZ::LR: + case SystemZ::LGR: + case SystemZ::LGFR: + case SystemZ::LTR: + case SystemZ::LTGR: + case SystemZ::LTGFR: + case SystemZ::LER: + case SystemZ::LDR: + case SystemZ::LXR: + case SystemZ::LTEBR: + case SystemZ::LTDBR: + case SystemZ::LTXBR: + if (MI->getOperand(1).getReg() == Reg && + MI->getOperand(1).getSubReg() == SubReg) + return true; + } + + return false; +} + +// Describe the references to Reg in MI, including sub- and super-registers. +Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { + Reference Ref; + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (MO.isReg()) { + if (unsigned MOReg = MO.getReg()) { + if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) { + if (MO.isUse()) { + Ref.Use = true; + Ref.IndirectUse |= (MOReg != Reg); + } + if (MO.isDef()) { + Ref.Def = true; + Ref.IndirectDef |= (MOReg != Reg); + } + } + } + } + } + return Ref; +} + +// Compare compares the result of MI against zero. If MI is an addition +// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition +// and convert the branch to a BRCT(G). Return true on success. +bool +SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + // Check whether we have an addition of -1. + unsigned Opcode = MI->getOpcode(); + unsigned BRCT; + if (Opcode == SystemZ::AHI) + BRCT = SystemZ::BRCT; + else if (Opcode == SystemZ::AGHI) + BRCT = SystemZ::BRCTG; + else + return false; + if (MI->getOperand(2).getImm() != -1) + return false; + + // Check whether we have a single JLH. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::BRC || + Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || + Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE) + return false; + + // We already know that there are no references to the register between + // MI and Compare. Make sure that there are also no references between + // Compare and Branch. + unsigned SrcReg = Compare->getOperand(0).getReg(); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (getRegReferences(MBBI, SrcReg)) + return false; + + // The transformation is OK. Rebuild Branch as a BRCT(G). + MachineOperand Target(Branch->getOperand(2)); + Branch->RemoveOperand(2); + Branch->RemoveOperand(1); + Branch->RemoveOperand(0); + Branch->setDesc(TII->get(BRCT)); + MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + MI->removeFromParent(); + return true; +} + +// If MI is a load instruction, try to convert it into a LOAD AND TEST. +// Return true on success. +bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) { + unsigned Opcode = TII->getLoadAndTest(MI->getOpcode()); + if (!Opcode) + return false; + + MI->setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + return true; +} + +// The CC users in CCUsers are testing the result of a comparison of some +// value X against zero and we know that any CC value produced by MI +// would also reflect the value of X. Try to adjust CCUsers so that +// they test the result of MI directly, returning true on success. +// Leave everything unchanged on failure. +bool SystemZElimCompare:: +adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + int Opcode = MI->getOpcode(); + const MCInstrDesc &Desc = TII->get(Opcode); + unsigned MIFlags = Desc.TSFlags; + + // See which compare-style condition codes are available. + unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); + + // For unsigned comparisons with zero, only equality makes sense. + unsigned CompareFlags = Compare->getDesc().TSFlags; + if (CompareFlags & SystemZII::IsLogical) + ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; + + if (ReusableCCMask == 0) + return false; + + unsigned CCValues = SystemZII::getCCValues(MIFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + + // Now check whether these flags are enough for all users. + SmallVector<MachineOperand *, 4> AlterMasks; + for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { + MachineInstr *MI = CCUsers[I]; + + // Fail if this isn't a use of CC that we understand. + unsigned Flags = MI->getDesc().TSFlags; + unsigned FirstOpNum; + if (Flags & SystemZII::CCMaskFirst) + FirstOpNum = 0; + else if (Flags & SystemZII::CCMaskLast) + FirstOpNum = MI->getNumExplicitOperands() - 2; + else + return false; + + // Check whether the instruction predicate treats all CC values + // outside of ReusableCCMask in the same way. In that case it + // doesn't matter what those CC values mean. + unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); + unsigned OutValid = ~ReusableCCMask & CCValid; + unsigned OutMask = ~ReusableCCMask & CCMask; + if (OutMask != 0 && OutMask != OutValid) + return false; + + AlterMasks.push_back(&MI->getOperand(FirstOpNum)); + AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); + } + + // All users are OK. Adjust the masks for MI. + for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { + AlterMasks[I]->setImm(CCValues); + unsigned CCMask = AlterMasks[I + 1]->getImm(); + if (CCMask & ~ReusableCCMask) + AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | + (CCValues & ~ReusableCCMask)); + } + + // CC is now live after MI. + int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); + assert(CCDef >= 0 && "Couldn't find CC set"); + MI->getOperand(CCDef).setIsDead(false); + + // Clear any intervening kills of CC. + MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) + MBBI->clearRegisterKills(SystemZ::CC, TRI); + + return true; +} + +// Return true if Compare is a comparison against zero. +static bool isCompareZero(MachineInstr *Compare) { + switch (Compare->getOpcode()) { + case SystemZ::LTEBRCompare: + case SystemZ::LTDBRCompare: + case SystemZ::LTXBRCompare: + return true; + + default: + return (Compare->getNumExplicitOperands() == 2 && + Compare->getOperand(1).isImm() && + Compare->getOperand(1).getImm() == 0); + } +} + +// Try to optimize cases where comparison instruction Compare is testing +// a value against zero. Return true on success and if Compare should be +// deleted as dead. CCUsers is the list of instructions that use the CC +// value produced by Compare. +bool SystemZElimCompare:: +optimizeCompareZero(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + if (!isCompareZero(Compare)) + return false; + + // Search back for CC results that are based on the first operand. + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcSubReg = Compare->getOperand(0).getSubReg(); + MachineBasicBlock *MBB = Compare->getParent(); + MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->begin(); + Reference CCRefs; + Reference SrcRefs; + while (MBBI != MBBE) { + --MBBI; + MachineInstr *MI = MBBI; + if (resultTests(MI, SrcReg, SrcSubReg)) { + // Try to remove both MI and Compare by converting a branch to BRCT(G). + // We don't care in this case whether CC is modified between MI and + // Compare. + if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) { + BranchOnCounts += 1; + return true; + } + // Try to eliminate Compare by reusing a CC result from MI. + if ((!CCRefs && convertToLoadAndTest(MI)) || + (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { + EliminatedComparisons += 1; + return true; + } + } + SrcRefs |= getRegReferences(MI, SrcReg); + if (SrcRefs.Def) + return false; + CCRefs |= getRegReferences(MI, SystemZ::CC); + if (CCRefs.Use && CCRefs.Def) + return false; + } + return false; +} + +// Try to fuse comparison instruction Compare into a later branch. +// Return true on success and if Compare is therefore redundant. +bool SystemZElimCompare:: +fuseCompareAndBranch(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + // See whether we have a comparison that can be fused. + unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), + Compare); + if (!FusedOpcode) + return false; + + // See whether we have a single branch with which to fuse. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::BRC) + return false; + + // Make sure that the operands are available at the branch. + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg2 = (Compare->getOperand(1).isReg() ? + Compare->getOperand(1).getReg() : 0); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI->modifiesRegister(SrcReg, TRI) || + (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) + return false; + + // Read the branch mask and target. + MachineOperand CCMask(MBBI->getOperand(1)); + MachineOperand Target(MBBI->getOperand(2)); + assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && + "Invalid condition-code mask for integer comparison"); + + // Clear out all current operands. + int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); + assert(CCUse >= 0 && "BRC must use CC"); + Branch->RemoveOperand(CCUse); + Branch->RemoveOperand(2); + Branch->RemoveOperand(1); + Branch->RemoveOperand(0); + + // Rebuild Branch as a fused compare and branch. + Branch->setDesc(TII->get(FusedOpcode)); + MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) + .addOperand(Compare->getOperand(0)) + .addOperand(Compare->getOperand(1)) + .addOperand(CCMask) + .addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + + // Clear any intervening kills of SrcReg and SrcReg2. + MBBI = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MBBI->clearRegisterKills(SrcReg, TRI); + if (SrcReg2) + MBBI->clearRegisterKills(SrcReg2, TRI); + } + FusedComparisons += 1; + return true; +} + +// Process all comparison instructions in MBB. Return true if something +// changed. +bool SystemZElimCompare::processBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + // Walk backwards through the block looking for comparisons, recording + // all CC users as we go. The subroutines can delete Compare and + // instructions before it. + bool CompleteCCUsers = !isCCLiveOut(MBB); + SmallVector<MachineInstr *, 4> CCUsers; + MachineBasicBlock::iterator MBBI = MBB->end(); + while (MBBI != MBB->begin()) { + MachineInstr *MI = --MBBI; + if (CompleteCCUsers && + MI->isCompare() && + (optimizeCompareZero(MI, CCUsers) || + fuseCompareAndBranch(MI, CCUsers))) { + ++MBBI; + MI->removeFromParent(); + Changed = true; + CCUsers.clear(); + CompleteCCUsers = true; + continue; + } + + Reference CCRefs(getRegReferences(MI, SystemZ::CC)); + if (CCRefs.Def) { + CCUsers.clear(); + CompleteCCUsers = !CCRefs.IndirectDef; + } + if (CompleteCCUsers && CCRefs.Use) + CCUsers.push_back(MI); + } + return Changed; +} + +bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo()); + TRI = &TII->getRegisterInfo(); + + bool Changed = false; + for (MachineFunction::iterator MFI = F.begin(), MFE = F.end(); + MFI != MFE; ++MFI) + Changed |= processBlock(MFI); + + return Changed; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index fda33de..acfb491 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -14,19 +14,15 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" using namespace llvm; -SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, - const SystemZSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize), - TM(tm), - STI(sti) { +namespace { // The ABI-defined register save slots, relative to the incoming stack // pointer. - static const unsigned SpillOffsetTable[][2] = { + static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { { SystemZ::R2D, 0x10 }, { SystemZ::R3D, 0x18 }, { SystemZ::R4D, 0x20 }, @@ -46,11 +42,23 @@ SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, { SystemZ::F4D, 0x90 }, { SystemZ::F6D, 0x98 } }; +} +SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, + const SystemZSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, + -SystemZMC::CallFrameSize, 8), + TM(tm), STI(sti) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) - RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1]; + RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; +} + +const TargetFrameLowering::SpillSlot * +SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = array_lengthof(SpillOffsetTable); + return SpillOffsetTable; } void SystemZFrameLowering:: @@ -103,7 +111,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, const SystemZTargetMachine &TM, unsigned GPR64, bool IsImplicit) { const SystemZRegisterInfo *RI = TM.getRegisterInfo(); - unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit); + unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32); if (!IsLive || !IsImplicit) { MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive)); @@ -127,14 +135,12 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned SavedGPRFrameSize = 0; unsigned LowGPR = 0; unsigned HighGPR = SystemZ::R15D; unsigned StartOffset = -1U; for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - SavedGPRFrameSize += 8; unsigned Offset = RegSpillOffsets[Reg]; assert(Offset && "Unexpected GPR save"); if (StartOffset > Offset) { @@ -144,9 +150,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, } } - // Save information about the range and location of the call-saved - // registers, for use by the epilogue inserter. - ZFI->setSavedGPRFrameSize(SavedGPRFrameSize); + // Save the range of call-saved registers, for use by the epilogue inserter. ZFI->setLowSavedGPR(LowGPR); ZFI->setHighSavedGPR(HighGPR); @@ -260,6 +264,22 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +void SystemZFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + uint64_t MaxReach = (MFFrame->estimateStackSize(MF) + + SystemZMC::CallFrameSize * 2); + if (!isUInt<12>(MaxReach)) { + // We may need register scavenging slots if some parts of the frame + // are outside the reach of an unsigned 12-bit displacement. + // Create 2 for the case where both addresses in an MVC are + // out of range. + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + } +} + // Emit instructions before MBBI (in MBB) to add NumBytes to Reg. static void emitIncrement(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -283,7 +303,7 @@ static void emitIncrement(MachineBasicBlock &MBB, } MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg) .addReg(Reg).addImm(ThisVal); - // The PSW implicit def is dead. + // The CC implicit def is dead. MI->getOperand(3).setIsDead(); NumBytes -= ThisVal; } @@ -297,7 +317,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineModuleInfo &MMI = MF.getMMI(); - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo(); bool HasFP = hasFP(MF); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -321,9 +341,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = I->getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; - MachineLocation StackSlot(MachineLocation::VirtualFP, Offset); - MachineLocation RegValue(Reg); - Moves.push_back(MachineMove(GPRSaveLabel, StackSlot, RegValue)); + MMI.addFrameInst(MCCFIInstruction::createOffset( + GPRSaveLabel, MRI->getDwarfRegNum(Reg, true), Offset)); } } } @@ -338,9 +357,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL)) .addSym(AdjustSPLabel); - MachineLocation FPDest(MachineLocation::VirtualFP); - MachineLocation FPSrc(MachineLocation::VirtualFP, SPOffsetFromCFA + Delta); - Moves.push_back(MachineMove(AdjustSPLabel, FPDest, FPSrc)); + MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset( + AdjustSPLabel, SPOffsetFromCFA + Delta)); SPOffsetFromCFA += Delta; } @@ -353,9 +371,9 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL)) .addSym(SetFPLabel); - MachineLocation HardFP(SystemZ::R11D); - MachineLocation VirtualFP(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(SetFPLabel, HardFP, VirtualFP)); + unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); + MMI.addFrameInst( + MCCFIInstruction::createDefCfaRegister(SetFPLabel, HardFP)); // Mark the FramePtr as live at the beginning of every block except // the entry block. (We'll have marked R11 as live on entry when @@ -381,12 +399,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { // Add CFI for the this save. if (!FPRSaveLabel) FPRSaveLabel = MMI.getContext().CreateTempSymbol(); - unsigned Reg = I->getReg(); + unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx()); - MachineLocation Slot(MachineLocation::VirtualFP, - SPOffsetFromCFA + Offset); - MachineLocation RegValue(Reg); - Moves.push_back(MachineMove(FPRSaveLabel, Slot, RegValue)); + MMI.addFrameInst(MCCFIInstruction::createOffset( + FPRSaveLabel, Reg, SPOffsetFromCFA + Offset)); } } // Complete the CFI for the FPR saves, modelling them as taking effect @@ -404,8 +420,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); // Skip the return instruction. - assert(MBBI->getOpcode() == SystemZ::RET && - "Can only insert epilogue into returning blocks"); + assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks"); uint64_t StackSize = getAllocatedStackSize(MF); if (ZFI->getLowSavedGPR()) { @@ -453,11 +468,6 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, // offset is therefore negative. int64_t Offset = (MFFrame->getObjectOffset(FI) + MFFrame->getOffsetAdjustment()); - if (FI >= 0) - // Non-fixed objects are allocated below the incoming stack pointer. - // Account for the space at the top of the frame that we choose not - // to allocate. - Offset += getUnallocatedTopBytes(MF); // Make the offset relative to the incoming stack pointer. Offset -= getOffsetOfLocalArea(); @@ -469,23 +479,12 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, } uint64_t SystemZFrameLowering:: -getUnallocatedTopBytes(const MachineFunction &MF) const { - return MF.getInfo<SystemZMachineFunctionInfo>()->getSavedGPRFrameSize(); -} - -uint64_t SystemZFrameLowering:: getAllocatedStackSize(const MachineFunction &MF) const { const MachineFrameInfo *MFFrame = MF.getFrameInfo(); // Start with the size of the local variables and spill slots. uint64_t StackSize = MFFrame->getStackSize(); - // Remove any bytes that we choose not to allocate. - StackSize -= getUnallocatedTopBytes(MF); - - // Include space for an emergency spill slot, if one might be needed. - StackSize += getEmergencySpillSlotSize(MF); - // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another // function. @@ -495,19 +494,6 @@ getAllocatedStackSize(const MachineFunction &MF) const { return StackSize; } -unsigned SystemZFrameLowering:: -getEmergencySpillSlotSize(const MachineFunction &MF) const { - const MachineFrameInfo *MFFrame = MF.getFrameInfo(); - uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2; - return isUInt<12>(MaxReach) ? 0 : 8; -} - -unsigned SystemZFrameLowering:: -getEmergencySpillSlotOffset(const MachineFunction &MF) const { - assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot"); - return SystemZMC::CallFrameSize; -} - bool SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { // The ABI requires us to allocate 160 bytes of stack space for the callee, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 5ca049c..9b0a1d5 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -29,7 +29,10 @@ public: SystemZFrameLowering(const SystemZTargetMachine &tm, const SystemZSubtarget &sti); - // Override FrameLowering. + // Override TargetFrameLowering. + virtual bool isFPCloseToIncomingSP() const LLVM_OVERRIDE { return false; } + virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const + LLVM_OVERRIDE; virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const LLVM_OVERRIDE; @@ -45,6 +48,8 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const; virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const LLVM_OVERRIDE; @@ -59,29 +64,9 @@ public: MachineBasicBlock::iterator MI) const LLVM_OVERRIDE; - // The target-independent code automatically allocates save slots for - // call-saved GPRs. However, we don't need those slots for SystemZ, - // because the ABI sets aside GPR save slots in the caller-allocated part - // of the frame. Since the target-independent code puts this unneeded - // area at the top of the callee-allocated part of frame, we choose not - // to allocate it and adjust the offsets accordingly. Return the - // size of this unallocated area. - // FIXME: seems a bit hackish. - uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const; - // Return the number of bytes in the callee-allocated part of the frame. uint64_t getAllocatedStackSize(const MachineFunction &MF) const; - // Return the number of frame bytes that should be reserved for - // an emergency spill slot, for use by the register scaveneger. - // Return 0 if register scaveging won't be needed. - unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const; - - // Return the offset from the frame pointer of the emergency spill slot, - // which always fits within a 12-bit unsigned displacement field. - // Only valid if getEmergencySpillSlotSize(MF) returns nonzero. - unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const; - // Return the byte offset from the incoming stack pointer of Reg's // ABI-defined save slot. Return 0 if no slot is defined for Reg. unsigned getRegSpillOffset(unsigned Reg) const { diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 442f0c4..f4a2773 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -91,44 +92,90 @@ struct SystemZAddressingMode { } }; +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + +// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation +// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and +// Rotate (I5). The combined operand value is effectively: +// +// (or (rotl Input, Rotate), ~Mask) +// +// for RNSBG and: +// +// (and (rotl Input, Rotate), Mask) +// +// otherwise. The output value has BitSize bits, although Input may be +// narrower (in which case the upper bits are don't care). +struct RxSBGOperands { + RxSBGOperands(unsigned Op, SDValue N) + : Opcode(Op), BitSize(N.getValueType().getSizeInBits()), + Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63), + Rotate(0) {} + + unsigned Opcode; + unsigned BitSize; + uint64_t Mask; + SDValue Input; + unsigned Start; + unsigned End; + unsigned Rotate; +}; + class SystemZDAGToDAGISel : public SelectionDAGISel { const SystemZTargetLowering &Lowering; const SystemZSubtarget &Subtarget; // Used by SystemZOperands.td to create integer constants. - inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + inline SDValue getImm(const SDNode *Node, uint64_t Imm) const { return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); } + const SystemZTargetMachine &getTargetMachine() const { + return static_cast<const SystemZTargetMachine &>(TM); + } + + const SystemZInstrInfo *getInstrInfo() const { + return getTargetMachine().getInstrInfo(); + } + // Try to fold more of the base or index of AM into AM, where IsBase // selects between the base and index. - bool expandAddress(SystemZAddressingMode &AM, bool IsBase); + bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const; // Try to describe N in AM, returning true on success. - bool selectAddress(SDValue N, SystemZAddressingMode &AM); + bool selectAddress(SDValue N, SystemZAddressingMode &AM) const; // Extract individual target operands from matched address AM. void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, - SDValue &Base, SDValue &Disp); + SDValue &Base, SDValue &Disp) const; void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, - SDValue &Base, SDValue &Disp, SDValue &Index); + SDValue &Base, SDValue &Disp, SDValue &Index) const; // Try to match Addr as a FormBD address with displacement type DR. // Return true on success, storing the base and displacement in // Base and Disp respectively. bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, - SDValue &Base, SDValue &Disp); + SDValue &Base, SDValue &Disp) const; + + // Try to match Addr as a FormBDX address with displacement type DR. + // Return true on success and if the result had no index. Store the + // base and displacement in Base and Disp respectively. + bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp) const; // Try to match Addr as a FormBDX* address of form Form with // displacement type DR. Return true on success, storing the base, // displacement and index in Base, Disp and Index respectively. bool selectBDXAddr(SystemZAddressingMode::AddrForm Form, SystemZAddressingMode::DispRange DR, SDValue Addr, - SDValue &Base, SDValue &Disp, SDValue &Index); + SDValue &Base, SDValue &Disp, SDValue &Index) const; // PC-relative address matching routines used by SystemZOperands.td. - bool selectPCRelAddress(SDValue Addr, SDValue &Target) { - if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) { + bool selectPCRelAddress(SDValue Addr, SDValue &Target) const { + if (SystemZISD::isPCREL(Addr.getOpcode())) { Target = Addr.getOperand(0); return true; } @@ -136,69 +183,104 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { } // BD matching routines used by SystemZOperands.td. - bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) { + bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp); } - bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) { + bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); } - bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) { + bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp); } - bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) { + bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); } + // MVI matching routines used by SystemZOperands.td. + bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); + } + bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { + return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); + } + // BDX matching routines used by SystemZOperands.td. bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, SystemZAddressingMode::Disp12Only, Addr, Base, Disp, Index); } bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, SystemZAddressingMode::Disp12Pair, Addr, Base, Disp, Index); } bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc, SystemZAddressingMode::Disp12Only, Addr, Base, Disp, Index); } bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, SystemZAddressingMode::Disp20Only, Addr, Base, Disp, Index); } bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, SystemZAddressingMode::Disp20Only128, Addr, Base, Disp, Index); } bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, SystemZAddressingMode::Disp20Pair, Addr, Base, Disp, Index); } bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXLA, SystemZAddressingMode::Disp12Pair, Addr, Base, Disp, Index); } bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, - SDValue &Index) { + SDValue &Index) const { return selectBDXAddr(SystemZAddressingMode::FormBDXLA, SystemZAddressingMode::Disp20Pair, Addr, Base, Disp, Index); } + // Check whether (or Op (and X InsertMask)) is effectively an insertion + // of X into bits InsertMask of some Y != Op. Return true if so and + // set Op to that Y. + bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const; + + // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used. + // Return true on success. + bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const; + + // Try to fold some of RxSBG.Input into other fields of RxSBG. + // Return true on success. + bool expandRxSBG(RxSBGOperands &RxSBG) const; + + // Return an undefined value of type VT. + SDValue getUNDEF(SDLoc DL, EVT VT) const; + + // Convert N to VT, if it isn't already. + SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const; + + // Try to implement AND or shift node N using RISBG with the zero flag set. + // Return the selected node on success, otherwise return null. + SDNode *tryRISBGZero(SDNode *N); + + // Try to use RISBG or Opcode to implement OR or XOR node N. + // Return the selected node on success, otherwise return null. + SDNode *tryRxSBG(SDNode *N, unsigned Opcode); + // If Op0 is null, then Node is a constant that can be loaded using: // // (Opcode UpperVal LowerVal) @@ -209,6 +291,26 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + // Return true if Load and Store are loads and stores of the same size + // and are guaranteed not to overlap. Such operations can be implemented + // using block (SS-format) instructions. + // + // Partial overlap would lead to incorrect code, since the block operations + // are logically bytewise, even though they have a fast path for the + // non-overlapping case. We also need to avoid full overlap (i.e. two + // addresses that might be equal at run time) because although that case + // would be handled correctly, it might be implemented by millicode. + bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const; + + // N is a (store (load Y), X) pattern. Return true if it can use an MVC + // from Y to X. + bool storeLoadCanUseMVC(SDNode *N) const; + + // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true + // if A[1 - I] == X and if N can use a block operation like NC from A[I] + // to X. + bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel), @@ -294,9 +396,9 @@ static bool expandIndex(SystemZAddressingMode &AM, SDValue Base, // The base or index of AM is equivalent to Op0 + Op1, where IsBase selects // between the base and index. Try to fold Op1 into AM's displacement. static bool expandDisp(SystemZAddressingMode &AM, bool IsBase, - SDValue Op0, ConstantSDNode *Op1) { + SDValue Op0, uint64_t Op1) { // First try adjusting the displacement. - int64_t TestDisp = AM.Disp + Op1->getSExtValue(); + int64_t TestDisp = AM.Disp + Op1; if (selectDisp(AM.DR, TestDisp)) { changeComponent(AM, IsBase, Op0); AM.Disp = TestDisp; @@ -309,7 +411,7 @@ static bool expandDisp(SystemZAddressingMode &AM, bool IsBase, } bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, - bool IsBase) { + bool IsBase) const { SDValue N = IsBase ? AM.Base : AM.Index; unsigned Opcode = N.getOpcode(); if (Opcode == ISD::TRUNCATE) { @@ -329,13 +431,23 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, return expandAdjDynAlloc(AM, IsBase, Op0); if (Op0Code == ISD::Constant) - return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0)); + return expandDisp(AM, IsBase, Op1, + cast<ConstantSDNode>(Op0)->getSExtValue()); if (Op1Code == ISD::Constant) - return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1)); + return expandDisp(AM, IsBase, Op0, + cast<ConstantSDNode>(Op1)->getSExtValue()); if (IsBase && expandIndex(AM, Op0, Op1)) return true; } + if (Opcode == SystemZISD::PCREL_OFFSET) { + SDValue Full = N.getOperand(0); + SDValue Base = N.getOperand(1); + SDValue Anchor = Base.getOperand(0); + uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() - + cast<GlobalAddressSDNode>(Anchor)->getOffset()); + return expandDisp(AM, IsBase, Base, Offset); + } return false; } @@ -414,14 +526,15 @@ static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) { // Return true if Addr is suitable for AM, updating AM if so. bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, - SystemZAddressingMode &AM) { + SystemZAddressingMode &AM) const { // Start out assuming that the address will need to be loaded separately, // then try to extend it as much as we can. AM.Base = Addr; // First try treating the address as a constant. if (Addr.getOpcode() == ISD::Constant && - expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr))) + expandDisp(AM, true, SDValue(), + cast<ConstantSDNode>(Addr)->getSExtValue())) ; else // Otherwise try expanding each component. @@ -461,7 +574,7 @@ static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, EVT VT, SDValue &Base, - SDValue &Disp) { + SDValue &Disp) const { Base = AM.Base; if (!Base.getNode()) // Register 0 means "no base". This is mostly useful for shifts. @@ -474,7 +587,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, // Truncate values from i64 to i32, for shifts. assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 && "Unexpected truncation"); - DebugLoc DL = Base.getDebugLoc(); + SDLoc DL(Base); SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base); insertDAGNode(CurDAG, Base.getNode(), Trunc); Base = Trunc; @@ -486,7 +599,8 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, EVT VT, SDValue &Base, - SDValue &Disp, SDValue &Index) { + SDValue &Disp, + SDValue &Index) const { getAddressOperands(AM, VT, Base, Disp); Index = AM.Index; @@ -497,7 +611,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, SDValue &Base, - SDValue &Disp) { + SDValue &Disp) const { SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR); if (!selectAddress(Addr, AM)) return false; @@ -506,10 +620,21 @@ bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, return true; } +bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp) const { + SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR); + if (!selectAddress(Addr, AM) || AM.Index.getNode()) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp); + return true; +} + bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, SystemZAddressingMode::DispRange DR, SDValue Addr, SDValue &Base, - SDValue &Disp, SDValue &Index) { + SDValue &Disp, SDValue &Index) const { SystemZAddressingMode AM(Form, DR); if (!selectAddress(Addr, AM)) return false; @@ -518,11 +643,317 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, return true; } +bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, + uint64_t InsertMask) const { + // We're only interested in cases where the insertion is into some operand + // of Op, rather than into Op itself. The only useful case is an AND. + if (Op.getOpcode() != ISD::AND) + return false; + + // We need a constant mask. + ConstantSDNode *MaskNode = + dyn_cast<ConstantSDNode>(Op.getOperand(1).getNode()); + if (!MaskNode) + return false; + + // It's not an insertion of Op.getOperand(0) if the two masks overlap. + uint64_t AndMask = MaskNode->getZExtValue(); + if (InsertMask & AndMask) + return false; + + // It's only an insertion if all bits are covered or are known to be zero. + // The inner check covers all cases but is more expensive. + uint64_t Used = allOnes(Op.getValueType().getSizeInBits()); + if (Used != (AndMask | InsertMask)) { + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne); + if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) + return false; + } + + Op = Op.getOperand(0); + return true; +} + +bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, + uint64_t Mask) const { + const SystemZInstrInfo *TII = getInstrInfo(); + if (RxSBG.Rotate != 0) + Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)); + Mask &= RxSBG.Mask; + if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) { + RxSBG.Mask = Mask; + return true; + } + return false; +} + +// Return true if any bits of (RxSBG.Input & Mask) are significant. +static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) { + // Rotate the mask in the same way as RxSBG.Input is rotated. + if (RxSBG.Rotate != 0) + Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate))); + return (Mask & RxSBG.Mask) != 0; +} + +bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { + SDValue N = RxSBG.Input; + unsigned Opcode = N.getOpcode(); + switch (Opcode) { + case ISD::AND: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + + ConstantSDNode *MaskNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known zeros, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask |= KnownZero.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::OR: { + if (RxSBG.Opcode != SystemZ::RNSBG) + return false; + + ConstantSDNode *MaskNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = ~MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known ones, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask &= ~KnownOne.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::ROTL: { + // Any 64-bit rotate left can be merged into the RxSBG. + if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64) + return false; + ConstantSDNode *CountNode + = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: { + // Check that the extension bits are don't-care (i.e. are masked out + // by the final mask). + unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits(); + if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize))) + return false; + + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SHL: { + ConstantSDNode *CountNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + unsigned BitSize = N.getValueType().getSizeInBits(); + if (Count < 1 || Count >= BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG) { + // Treat (shl X, count) as (rotl X, size-count) as long as the bottom + // count bits from RxSBG.Input are ignored. + if (maskMatters(RxSBG, allOnes(Count))) + return false; + } else { + // Treat (shl X, count) as (and (rotl X, count), ~0<<count). + if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count)) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate + Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SRL: + case ISD::SRA: { + ConstantSDNode *CountNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + unsigned BitSize = N.getValueType().getSizeInBits(); + if (Count < 1 || Count >= BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { + // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top + // count bits from RxSBG.Input are ignored. + if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count))) + return false; + } else { + // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), + // which is similar to SLL above. + if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count))) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + default: + return false; + } +} + +SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const { + SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return SDValue(N, 0); +} + +SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const { + if (N.getValueType() == MVT::i32 && VT == MVT::i64) + return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32, + DL, VT, getUNDEF(DL, MVT::i64), N); + if (N.getValueType() == MVT::i64 && VT == MVT::i32) + return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N); + assert(N.getValueType() == VT && "Unexpected value types"); + return N; +} + +SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { + EVT VT = N->getValueType(0); + RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); + unsigned Count = 0; + while (expandRxSBG(RISBG)) + if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND) + Count += 1; + if (Count == 0) + return 0; + if (Count == 1) { + // Prefer to use normal shift instructions over RISBG, since they can handle + // all cases and are sometimes shorter. + if (N->getOpcode() != ISD::AND) + return 0; + + // Prefer register extensions like LLC over RISBG. Also prefer to start + // out with normal ANDs if one instruction would be enough. We can convert + // these ANDs into an RISBG later if a three-address instruction is useful. + if (VT == MVT::i32 || + RISBG.Mask == 0xff || + RISBG.Mask == 0xffff || + SystemZ::isImmLF(~RISBG.Mask) || + SystemZ::isImmHF(~RISBG.Mask)) { + // Force the new mask into the DAG, since it may include known-one bits. + ConstantSDNode *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode()); + if (MaskN->getZExtValue() != RISBG.Mask) { + SDValue NewMask = CurDAG->getConstant(RISBG.Mask, VT); + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask); + return SelectCode(N); + } + return 0; + } + } + + unsigned Opcode = SystemZ::RISBG; + EVT OpcodeVT = MVT::i64; + if (VT == MVT::i32 && Subtarget.hasHighWord()) { + Opcode = SystemZ::RISBMux; + OpcodeVT = MVT::i32; + RISBG.Start &= 31; + RISBG.End &= 31; + } + SDValue Ops[5] = { + getUNDEF(SDLoc(N), OpcodeVT), + convertTo(SDLoc(N), OpcodeVT, RISBG.Input), + CurDAG->getTargetConstant(RISBG.Start, MVT::i32), + CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32), + CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32) + }; + N = CurDAG->getMachineNode(Opcode, SDLoc(N), OpcodeVT, Ops); + return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); +} + +SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { + // Try treating each operand of N as the second operand of the RxSBG + // and see which goes deepest. + RxSBGOperands RxSBG[] = { + RxSBGOperands(Opcode, N->getOperand(0)), + RxSBGOperands(Opcode, N->getOperand(1)) + }; + unsigned Count[] = { 0, 0 }; + for (unsigned I = 0; I < 2; ++I) + while (expandRxSBG(RxSBG[I])) + if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND) + Count[I] += 1; + + // Do nothing if neither operand is suitable. + if (Count[0] == 0 && Count[1] == 0) + return 0; + + // Pick the deepest second operand. + unsigned I = Count[0] > Count[1] ? 0 : 1; + SDValue Op0 = N->getOperand(I ^ 1); + + // Prefer IC for character insertions from memory. + if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) + if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Op0.getNode())) + if (Load->getMemoryVT() == MVT::i8) + return 0; + + // See whether we can avoid an AND in the first operand by converting + // ROSBG to RISBG. + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) + Opcode = SystemZ::RISBG; + + EVT VT = N->getValueType(0); + SDValue Ops[5] = { + convertTo(SDLoc(N), MVT::i64, Op0), + convertTo(SDLoc(N), MVT::i64, RxSBG[I].Input), + CurDAG->getTargetConstant(RxSBG[I].Start, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].End, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].Rotate, MVT::i32) + }; + N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops); + return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); +} + SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal) { EVT VT = Node->getValueType(0); - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); SDValue Upper = CurDAG->getConstant(UpperVal, VT); if (Op0.getNode()) Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper); @@ -533,6 +964,64 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, return Or.getNode(); } +bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, + LoadSDNode *Load) const { + // Check that the two memory operands have the same size. + if (Load->getMemoryVT() != Store->getMemoryVT()) + return false; + + // Volatility stops an access from being decomposed. + if (Load->isVolatile() || Store->isVolatile()) + return false; + + // There's no chance of overlap if the load is invariant. + if (Load->isInvariant()) + return true; + + // Otherwise we need to check whether there's an alias. + const Value *V1 = Load->getSrcValue(); + const Value *V2 = Store->getSrcValue(); + if (!V1 || !V2) + return false; + + // Reject equality. + uint64_t Size = Load->getMemoryVT().getStoreSize(); + int64_t End1 = Load->getSrcValueOffset() + Size; + int64_t End2 = Store->getSrcValueOffset() + Size; + if (V1 == V2 && End1 == End2) + return false; + + return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()), + AliasAnalysis::Location(V2, End2, Store->getTBAAInfo())); +} + +bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { + StoreSDNode *Store = cast<StoreSDNode>(N); + LoadSDNode *Load = cast<LoadSDNode>(Store->getValue()); + + // Prefer not to use MVC if either address can use ... RELATIVE LONG + // instructions. + uint64_t Size = Load->getMemoryVT().getStoreSize(); + if (Size > 1 && Size <= 8) { + // Prefer LHRL, LRL and LGRL. + if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode())) + return false; + // Prefer STHRL, STRL and STGRL. + if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode())) + return false; + } + + return canUseBlockOperation(Store, Load); +} + +bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, + unsigned I) const { + StoreSDNode *StoreA = cast<StoreSDNode>(N); + LoadSDNode *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I)); + LoadSDNode *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I)); + return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB); +} + SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -545,12 +1034,21 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } unsigned Opcode = Node->getOpcode(); + SDNode *ResNode = 0; switch (Opcode) { case ISD::OR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::ROSBG); + goto or_xor; + case ISD::XOR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RXSBG); + // Fall through. + or_xor: // If this is a 64-bit operation in which both 32-bit halves are nonzero, // split the operation into two. - if (Node->getValueType(0) == MVT::i64) + if (!ResNode && Node->getValueType(0) == MVT::i64) if (ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) @@ -559,6 +1057,17 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } break; + case ISD::AND: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RNSBG); + // Fall through. + case ISD::ROTL: + case ISD::SHL: + case ISD::SRL: + if (!ResNode) + ResNode = tryRISBGZero(Node); + break; + case ISD::Constant: // If this is a 64-bit constant that is out of the range of LLILF, // LLIHF and LGFI, split it into two 32-bit pieces. @@ -583,10 +1092,32 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } } break; + + case SystemZISD::SELECT_CCMASK: { + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + // Prefer to put any load first, so that it can be matched as a + // conditional load. + if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) { + SDValue CCValid = Node->getOperand(2); + SDValue CCMask = Node->getOperand(3); + uint64_t ConstCCValid = + cast<ConstantSDNode>(CCValid.getNode())->getZExtValue(); + uint64_t ConstCCMask = + cast<ConstantSDNode>(CCMask.getNode())->getZExtValue(); + // Invert the condition. + CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, + CCMask.getValueType()); + SDValue Op4 = Node->getOperand(4); + Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); + } + break; + } } // Select the default instruction - SDNode *ResNode = SelectCode(Node); + if (!ResNode) + ResNode = SelectCode(Node); DEBUG(errs() << "=> "; if (ResNode == NULL || ResNode == Node) diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index eb21b31..f6e1853 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -23,8 +23,23 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include <cctype> + using namespace llvm; +namespace { +// Represents a sequence for extracting a 0/1 value from an IPM result: +// (((X ^ XORValue) + AddValue) >> Bit) +struct IPMConversion { + IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) + : XORValue(xorValue), AddValue(addValue), Bit(bit) {} + + int64_t XORValue; + int64_t AddValue; + unsigned Bit; +}; +} + // Classify VT as either 32 or 64 bit. static bool is32Bit(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { @@ -51,7 +66,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) MVT PtrVT = getPointerTy(); // Set up the register classes. - addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); + if (Subtarget.hasHighWord()) + addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); + else + addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); @@ -67,7 +85,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // TODO: It may be better to default to latency-oriented scheduling, however // LLVM's current latency-oriented scheduler can't handle physreg definitions - // such as SystemZ has with PSW, so set this to the register-pressure + // such as SystemZ has with CC, so set this to the register-pressure // scheduler, because it can. setSchedulingPreference(Sched::RegPressure); @@ -83,8 +101,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) ++I) { MVT VT = MVT::SimpleValueType(I); if (isTypeLegal(VT)) { - // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND). - setOperationAction(ISD::SETCC, VT, Expand); + // Lower SET_CC into an IPM-based sequence. + setOperationAction(ISD::SETCC, VT, Custom); // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). setOperationAction(ISD::SELECT, VT, Expand); @@ -128,9 +146,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); - // Use *MUL_LOHI where possible and a wider multiplication otherwise. + // Use *MUL_LOHI where possible instead of MULH*. setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Custom); + setOperationAction(ISD::UMUL_LOHI, VT, Custom); // We have instructions for signed but not unsigned FP conversion. setOperationAction(ISD::FP_TO_UINT, VT, Expand); @@ -165,14 +185,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); - // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR), - // but they aren't really worth using. There is no 64-bit SMUL_LOHI, - // but there is a 64-bit UMUL_LOHI: MLGR. - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); - // FIXME: Can we support these natively? setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); @@ -200,10 +212,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); - // Expand these using getExceptionSelectorRegister() and - // getExceptionPointerRegister(). - setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand); - setOperationAction(ISD::EHSELECTION, PtrVT, Expand); + // Handle prefetches with PFD or PFDRL. + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; @@ -214,6 +224,15 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // We can use FI for FRINT. setOperationAction(ISD::FRINT, VT, Legal); + // We can use the extended form of FI for other rounding operations. + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUND, VT, Legal); + } + // No special instructions for these. setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); @@ -246,6 +265,43 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); + + // We want to use MVC in preference to even a single load/store pair. + MaxStoresPerMemcpy = 0; + MaxStoresPerMemcpyOptSize = 0; + + // The main memset sequence is a byte store followed by an MVC. + // Two STC or MV..I stores win over that, but the kind of fused stores + // generated by target-independent code don't when the byte value is + // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better + // than "STC;MVC". Handle the choice in target-specific code instead. + MaxStoresPerMemset = 0; + MaxStoresPerMemsetOptSize = 0; +} + +EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + case MVT::f128: + return false; + default: + break; + } + + return false; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { @@ -253,6 +309,47 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isZero() || Imm.isNegZero(); } +bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + // Unaligned accesses should never be slower than the expanded version. + // We check specifically for aligned accesses in the few cases where + // they are required. + if (Fast) + *Fast = true; + return true; +} + +bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // Punt on globals for now, although they can be used in limited + // RELATIVE LONG cases. + if (AM.BaseGV) + return false; + + // Require a 20-bit signed offset. + if (!isInt<20>(AM.BaseOffs)) + return false; + + // Indexing is OK but no scale factor can be applied. + return AM.Scale == 0 || AM.Scale == 1; +} + +bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { + if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) + return false; + unsigned FromBits = FromType->getPrimitiveSizeInBits(); + unsigned ToBits = ToType->getPrimitiveSizeInBits(); + return FromBits > ToBits; +} + +bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { + if (!FromVT.isInteger() || !ToVT.isInteger()) + return false; + unsigned FromBits = FromVT.getSizeInBits(); + unsigned ToBits = ToVT.getSizeInBits(); + return FromBits > ToBits; +} + //===----------------------------------------------------------------------===// // Inline asm support //===----------------------------------------------------------------------===// @@ -264,6 +361,7 @@ SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { case 'a': // Address register case 'd': // Data register (equivalent to 'r') case 'f': // Floating-point register + case 'h': // High-part register case 'r': // General-purpose register return C_RegisterClass; @@ -306,6 +404,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, case 'a': // Address register case 'd': // Data register (equivalent to 'r') + case 'h': // High-part register case 'r': // General-purpose register if (CallOperandVal->getType()->isIntegerTy()) weight = CW_Register; @@ -349,8 +448,24 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, return weight; } +// Parse a "{tNNN}" register constraint for which the register type "t" +// has already been verified. MC is the class associated with "t" and +// Map maps 0-based register numbers to LLVM register numbers. +static std::pair<unsigned, const TargetRegisterClass *> +parseRegisterNumber(const std::string &Constraint, + const TargetRegisterClass *RC, const unsigned *Map) { + assert(*(Constraint.end()-1) == '}' && "Missing '}'"); + if (isdigit(Constraint[2])) { + std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); + unsigned Index = atoi(Suffix.c_str()); + if (Index < 16 && Map[Index]) + return std::make_pair(Map[Index], RC); + } + return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); +} + std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { +getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -370,6 +485,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); + case 'h': // High-part register (an LLVM extension) + return std::make_pair(0U, &SystemZ::GRH32BitRegClass); + case 'f': // Floating-point register if (VT == MVT::f64) return std::make_pair(0U, &SystemZ::FP64BitRegClass); @@ -378,6 +496,32 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { return std::make_pair(0U, &SystemZ::FP32BitRegClass); } } + if (Constraint[0] == '{') { + // We need to override the default register parsing for GPRs and FPRs + // because the interpretation depends on VT. The internal names of + // the registers are also different from the external names + // (F0D and F0S instead of F0, etc.). + if (Constraint[1] == 'r') { + if (VT == MVT::i32) + return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, + SystemZMC::GR32Regs); + if (VT == MVT::i128) + return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, + SystemZMC::GR128Regs); + return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, + SystemZMC::GR64Regs); + } + if (Constraint[1] == 'f') { + if (VT == MVT::f32) + return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, + SystemZMC::FP32Regs); + if (VT == MVT::f128) + return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, + SystemZMC::FP128Regs); + return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, + SystemZMC::FP64Regs); + } + } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } @@ -433,10 +577,21 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, #include "SystemZGenCallingConv.inc" +bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, + Type *ToType) const { + return isTruncateFree(FromType, ToType); +} + +bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!CI->isTailCall()) + return false; + return true; +} + // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. -static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL, +static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, CCValAssign &VA, SDValue Chain, SDValue Value) { // If the argument has been promoted from a smaller type, insert an @@ -461,7 +616,7 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL, // Value is a value of type VA.getValVT() that we need to copy into // the location described by VA. Return a copy of Value converted to // VA.getValVT(). The caller is responsible for handling indirect values. -static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL, +static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, CCValAssign &VA, SDValue Value) { switch (VA.getLocInfo()) { case CCValAssign::SExt: @@ -480,7 +635,7 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL, SDValue SystemZTargetLowering:: LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, + SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -595,35 +750,56 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, return Chain; } +static bool canUseSiblingCall(CCState ArgCCInfo, + SmallVectorImpl<CCValAssign> &ArgLocs) { + // Punt if there are any indirect or stack arguments, or if the call + // needs the call-saved argument register R6. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (!VA.isRegLoc()) + return false; + unsigned Reg = VA.getLocReg(); + if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) + return false; + } + return true; +} + SDValue SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &DL = CLI.DL; - SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDLoc &DL = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; + bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(); - // SystemZ target does not yet support tail call optimization. - isTailCall = false; - // Analyze the operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); + // We don't support GuaranteedTailCallOpt, only automatically-detected + // sibling calls. + if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) + IsTailCall = false; + // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); // Mark the start of the call. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true)); + if (!IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), + DL); // Copy argument values to their designated locations. SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; @@ -672,22 +848,27 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes, chained and glued together. - SDValue Glue; - for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, - RegsToPass[I].second, Glue); - Glue = Chain.getValue(1); - } - // Accept direct calls by converting symbolic call addresses to the - // associated Target* opcodes. + // associated Target* opcodes. Force %r1 to be used for indirect + // tail calls. + SDValue Glue; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } else if (IsTailCall) { + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); + Glue = Chain.getValue(1); + Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); + } + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, + RegsToPass[I].second, Glue); + Glue = Chain.getValue(1); } // The first call operand is the chain and the second is the target address. @@ -707,6 +888,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + if (IsTailCall) + return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size()); Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); Glue = Chain.getValue(1); @@ -714,7 +897,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, PtrVT, true), DAG.getConstant(0, PtrVT, true), - Glue); + Glue, DL); Glue = Chain.getValue(1); // Assign locations to each value returned by this call. @@ -745,7 +928,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const { + SDLoc DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // Assign locations to each returned value. @@ -815,6 +998,96 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) { #undef CONV } +// Return a sequence for getting a 1 from an IPM result when CC has a +// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. +// The handling of CC values outside CCValid doesn't matter. +static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { + // Deal with cases where the result can be taken directly from a bit + // of the IPM result. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC); + if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC + 1); + + // Deal with cases where we can add a value to force the sign bit + // to contain the right value. Putting the bit in 31 means we can + // use SRL rather than RISBG(L), and also makes it easier to get a + // 0/-1 value, so it has priority over the other tests below. + // + // These sequences rely on the fact that the upper two bits of the + // IPM result are zero. + uint64_t TopBit = uint64_t(1) << 31; + if (CCMask == (CCValid & SystemZ::CCMASK_0)) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) + return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_2))) + return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_3)) + return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_1 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); + + // Next try inverting the value and testing a bit. 0/1 could be + // handled this way too, but we dealt with that case above. + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) + return IPMConversion(-1, 0, SystemZ::IPM_CC); + + // Handle cases where adding a value forces a non-sign bit to contain + // the right value. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) + return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); + + // The remaing cases are 1, 2, 0/1/3 and 0/2/3. All these are + // can be done by inverting the low CC bit and applying one of the + // sign-based extractions above. + if (CCMask == (CCValid & SystemZ::CCMASK_1)) + return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_2)) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (1 << SystemZ::IPM_CC), 31); + + llvm_unreachable("Unexpected CC combination"); +} + +// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 +// can be converted to a comparison against zero, adjust the operands +// as necessary. +static void adjustZeroCmp(SelectionDAG &DAG, bool &IsUnsigned, + SDValue &CmpOp0, SDValue &CmpOp1, + unsigned &CCMask) { + if (IsUnsigned) + return; + + ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(CmpOp1.getNode()); + if (!ConstOp1) + return; + + int64_t Value = ConstOp1->getSExtValue(); + if ((Value == -1 && CCMask == SystemZ::CCMASK_CMP_GT) || + (Value == -1 && CCMask == SystemZ::CCMASK_CMP_LE) || + (Value == 1 && CCMask == SystemZ::CCMASK_CMP_LT) || + (Value == 1 && CCMask == SystemZ::CCMASK_CMP_GE)) { + CCMask ^= SystemZ::CCMASK_CMP_EQ; + CmpOp1 = DAG.getConstant(0, CmpOp1.getValueType()); + } +} + // If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 // is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary. static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, @@ -840,7 +1113,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, uint64_t Mask = (1 << NumBits) - 1; if (Load->getExtensionType() == ISD::SEXTLOAD) { int64_t SignedValue = Constant->getSExtValue(); - if (uint64_t(SignedValue) + (1 << (NumBits - 1)) > Mask) + if (uint64_t(SignedValue) + (1ULL << (NumBits - 1)) > Mask) return; // Unsigned comparison between two sign-extended values is equivalent // to unsigned comparison between two zero-extended values. @@ -859,7 +1132,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT) // Test whether the high bit of the byte is set. Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true; - else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT) + else if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_GE) // Test whether the high bit of the byte is clear. Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true; else @@ -879,7 +1152,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD; if (CmpOp0.getValueType() != MVT::i32 || Load->getExtensionType() != ExtType) - CmpOp0 = DAG.getExtLoad(ExtType, Load->getDebugLoc(), MVT::i32, + CmpOp0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), Load->getMemoryVT(), Load->isVolatile(), Load->isNonTemporal(), @@ -891,67 +1164,309 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, CmpOp1 = DAG.getConstant(Value, MVT::i32); } -// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1 -// is an equality comparison that is better implemented using unsigned -// rather than signed comparison instructions. -static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0, - SDValue CmpOp1, unsigned CCMask) { - // The test must be for equality or inequality. - if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE) +// Return true if Op is either an unextended load, or a load suitable +// for integer register-memory comparisons of type ICmpType. +static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { + LoadSDNode *Load = dyn_cast<LoadSDNode>(Op.getNode()); + if (Load) { + // There are no instructions to compare a register with a memory byte. + if (Load->getMemoryVT() == MVT::i8) + return false; + // Otherwise decide on extension type. + switch (Load->getExtensionType()) { + case ISD::NON_EXTLOAD: + return true; + case ISD::SEXTLOAD: + return ICmpType != SystemZICMP::UnsignedOnly; + case ISD::ZEXTLOAD: + return ICmpType != SystemZICMP::SignedOnly; + default: + break; + } + } + return false; +} + +// Return true if it is better to swap comparison operands Op0 and Op1. +// ICmpType is the type of an integer comparison. +static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, + unsigned ICmpType) { + // Leave f128 comparisons alone, since they have no memory forms. + if (Op0.getValueType() == MVT::f128) return false; - if (CmpOp1.getOpcode() == ISD::Constant) { - uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue(); + // Always keep a floating-point constant second, since comparisons with + // zero can use LOAD TEST and comparisons with other constants make a + // natural memory operand. + if (isa<ConstantFPSDNode>(Op1)) + return false; - // If we're comparing with memory, prefer unsigned comparisons for - // values that are in the unsigned 16-bit range but not the signed - // 16-bit range. We want to use CLFHSI and CLGHSI. - if (CmpOp0.hasOneUse() && - ISD::isNormalLoad(CmpOp0.getNode()) && - (Value >= 32768 && Value < 65536)) - return true; + // Never swap comparisons with zero since there are many ways to optimize + // those later. + ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); + if (COp1 && COp1->getZExtValue() == 0) + return false; - // Use unsigned comparisons for values that are in the CLGFI range - // but not in the CGFI range. - if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1) + // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. + // In that case we generally prefer the memory to be second. + if ((isNaturalMemoryOperand(Op0, ICmpType) && Op0.hasOneUse()) && + !(isNaturalMemoryOperand(Op1, ICmpType) && Op1.hasOneUse())) { + // The only exceptions are when the second operand is a constant and + // we can use things like CHHSI. + if (!COp1) return true; + // The unsigned memory-immediate instructions can handle 16-bit + // unsigned integers. + if (ICmpType != SystemZICMP::SignedOnly && + isUInt<16>(COp1->getZExtValue())) + return false; + // The signed memory-immediate instructions can handle 16-bit + // signed integers. + if (ICmpType != SystemZICMP::UnsignedOnly && + isInt<16>(COp1->getSExtValue())) + return false; + return true; + } + return false; +} + +// Return true if shift operation N has an in-range constant shift value. +// Store it in ShiftVal if so. +static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { + ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!Shift) + return false; + uint64_t Amount = Shift->getZExtValue(); + if (Amount >= N.getValueType().getSizeInBits()) return false; + + ShiftVal = Amount; + return true; +} + +// Check whether an AND with Mask is suitable for a TEST UNDER MASK +// instruction and whether the CC value is descriptive enough to handle +// a comparison of type Opcode between the AND result and CmpVal. +// CCMask says which comparison result is being tested and BitSize is +// the number of bits in the operands. If TEST UNDER MASK can be used, +// return the corresponding CC mask, otherwise return 0. +static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, + uint64_t Mask, uint64_t CmpVal, + unsigned ICmpType) { + assert(Mask != 0 && "ANDs with zero should have been removed by now"); + + // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. + if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && + !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) + return 0; + + // Work out the masks for the lowest and highest bits. + unsigned HighShift = 63 - countLeadingZeros(Mask); + uint64_t High = uint64_t(1) << HighShift; + uint64_t Low = uint64_t(1) << countTrailingZeros(Mask); + + // Signed ordered comparisons are effectively unsigned if the sign + // bit is dropped. + bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); + + // Check for equality comparisons with 0, or the equivalent. + if (CmpVal == 0) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal <= Low) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal < Low) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_SOME_1; } - // Prefer CL for zero-extended loads. - if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND || - ISD::isZEXTLoad(CmpOp1.getNode())) - return true; + // Check for equality comparisons with the mask, or the equivalent. + if (CmpVal == Mask) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_SOME_0; + } - // ...and for "in-register" zero extensions. - if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) { - SDValue Mask = CmpOp1.getOperand(1); - if (Mask.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff) - return true; + // Check for ordered comparisons with the top bit. + if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_MSB_1; + } + if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_MSB_1; } - return false; + // If there are just two bits, we can do equality checks for Low and High + // as well. + if (Mask == Low + High) { + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; + } + + // Looks like we've exhausted our options. + return 0; +} + +// See whether the comparison (Opcode CmpOp0, CmpOp1, ICmpType) can be +// implemented as a TEST UNDER MASK instruction when the condition being +// tested is as described by CCValid and CCMask. Update the arguments +// with the TM version if so. +static void adjustForTestUnderMask(SelectionDAG &DAG, unsigned &Opcode, + SDValue &CmpOp0, SDValue &CmpOp1, + unsigned &CCValid, unsigned &CCMask, + unsigned &ICmpType) { + // Check that we have a comparison with a constant. + ConstantSDNode *ConstCmpOp1 = dyn_cast<ConstantSDNode>(CmpOp1); + if (!ConstCmpOp1) + return; + uint64_t CmpVal = ConstCmpOp1->getZExtValue(); + + // Check whether the nonconstant input is an AND with a constant mask. + if (CmpOp0.getOpcode() != ISD::AND) + return; + SDValue AndOp0 = CmpOp0.getOperand(0); + SDValue AndOp1 = CmpOp0.getOperand(1); + ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(AndOp1.getNode()); + if (!Mask) + return; + uint64_t MaskVal = Mask->getZExtValue(); + + // Check whether the combination of mask, comparison value and comparison + // type are suitable. + unsigned BitSize = CmpOp0.getValueType().getSizeInBits(); + unsigned NewCCMask, ShiftVal; + if (ICmpType != SystemZICMP::SignedOnly && + AndOp0.getOpcode() == ISD::SHL && + isSimpleShift(AndOp0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal >> ShiftVal, + CmpVal >> ShiftVal, + SystemZICMP::Any))) { + AndOp0 = AndOp0.getOperand(0); + AndOp1 = DAG.getConstant(MaskVal >> ShiftVal, AndOp0.getValueType()); + } else if (ICmpType != SystemZICMP::SignedOnly && + AndOp0.getOpcode() == ISD::SRL && + isSimpleShift(AndOp0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, + MaskVal << ShiftVal, + CmpVal << ShiftVal, + SystemZICMP::UnsignedOnly))) { + AndOp0 = AndOp0.getOperand(0); + AndOp1 = DAG.getConstant(MaskVal << ShiftVal, AndOp0.getValueType()); + } else { + NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, CmpVal, + ICmpType); + if (!NewCCMask) + return; + } + + // Go ahead and make the change. + Opcode = SystemZISD::TM; + CmpOp0 = AndOp0; + CmpOp1 = AndOp1; + ICmpType = (bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != + bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); + CCValid = SystemZ::CCMASK_TM; + CCMask = NewCCMask; } -// Return a target node that compares CmpOp0 and CmpOp1. Set CCMask to the -// 4-bit condition-code mask for CC. -static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode CC, unsigned &CCMask) { +// Return a target node that compares CmpOp0 with CmpOp1 and stores a +// 2-bit result in CC. Set CCValid to the CCMASK_* of all possible +// 2-bit results and CCMask to the subset of those results that are +// associated with Cond. +static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG, + SDLoc DL, SDValue CmpOp0, SDValue CmpOp1, + ISD::CondCode Cond, unsigned &CCValid, + unsigned &CCMask) { bool IsUnsigned = false; - CCMask = CCMaskForCondCode(CC); - if (!CmpOp0.getValueType().isFloatingPoint()) { + CCMask = CCMaskForCondCode(Cond); + unsigned Opcode, ICmpType = 0; + if (CmpOp0.getValueType().isFloatingPoint()) { + CCValid = SystemZ::CCMASK_FCMP; + Opcode = SystemZISD::FCMP; + } else { IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO; - CCMask &= ~SystemZ::CCMASK_CMP_UO; + CCValid = SystemZ::CCMASK_ICMP; + CCMask &= CCValid; + adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); - if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask)) - IsUnsigned = true; + Opcode = SystemZISD::ICMP; + // Choose the type of comparison. Equality and inequality tests can + // use either signed or unsigned comparisons. The choice also doesn't + // matter if both sign bits are known to be clear. In those cases we + // want to give the main isel code the freedom to choose whichever + // form fits best. + if (CCMask == SystemZ::CCMASK_CMP_EQ || + CCMask == SystemZ::CCMASK_CMP_NE || + (DAG.SignBitIsZero(CmpOp0) && DAG.SignBitIsZero(CmpOp1))) + ICmpType = SystemZICMP::Any; + else if (IsUnsigned) + ICmpType = SystemZICMP::UnsignedOnly; + else + ICmpType = SystemZICMP::SignedOnly; } - DebugLoc DL = CmpOp0.getDebugLoc(); - return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), - DL, MVT::Glue, CmpOp0, CmpOp1); + if (shouldSwapCmpOperands(CmpOp0, CmpOp1, ICmpType)) { + std::swap(CmpOp0, CmpOp1); + CCMask = ((CCMask & SystemZ::CCMASK_CMP_EQ) | + (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | + (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | + (CCMask & SystemZ::CCMASK_CMP_UO)); + } + + adjustForTestUnderMask(DAG, Opcode, CmpOp0, CmpOp1, CCValid, CCMask, + ICmpType); + if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM) + return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1, + DAG.getConstant(ICmpType, MVT::i32)); + return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1); +} + +// Implement a 32-bit *MUL_LOHI operation by extending both operands to +// 64 bits. Extend is the extension type to use. Store the high part +// in Hi and the low part in Lo. +static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, + unsigned Extend, SDValue Op0, SDValue Op1, + SDValue &Hi, SDValue &Lo) { + Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); + Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); + SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); + Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); + Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); } // Lower a binary operation that produces two VT results, one in each @@ -959,7 +1474,7 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation // on the extended Op0 and (unextended) Op1. Store the even register result // in Even and the odd register result in Odd. -static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT, +static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, unsigned Extend, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd) { @@ -967,14 +1482,38 @@ static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT, SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, SDValue(In128, 0), Op1); bool Is32Bit = is32Bit(VT); - SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT); - SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT); - SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, - VT, Result, SubReg0); - SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, - VT, Result, SubReg1); - Even = SDValue(Reg0, 0); - Odd = SDValue(Reg1, 0); + Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); + Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); +} + +SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + SDLoc DL(Op); + + unsigned CCValid, CCMask; + SDValue Glue = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); + + IPMConversion Conversion = getIPMConversion(CCValid, CCMask); + SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + + if (Conversion.XORValue) + Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, + DAG.getConstant(Conversion.XORValue, MVT::i32)); + + if (Conversion.AddValue) + Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, + DAG.getConstant(Conversion.AddValue, MVT::i32)); + + // The SHR/AND sequence should get optimized to an RISBG. + Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, + DAG.getConstant(Conversion.Bit, MVT::i32)); + if (Conversion.Bit != 31) + Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, + DAG.getConstant(1, MVT::i32)); + return Result; } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { @@ -983,12 +1522,13 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); - unsigned CCMask; - SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); + unsigned CCValid, CCMask; + SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags); + Chain, DAG.getConstant(CCValid, MVT::i32), + DAG.getConstant(CCMask, MVT::i32), Dest, Flags); } SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, @@ -998,14 +1538,15 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, SDValue TrueOp = Op.getOperand(2); SDValue FalseOp = Op.getOperand(3); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); - unsigned CCMask; - SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); + unsigned CCValid, CCMask; + SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); - SmallVector<SDValue, 4> Ops; + SmallVector<SDValue, 5> Ops; Ops.push_back(TrueOp); Ops.push_back(FalseOp); + Ops.push_back(DAG.getConstant(CCValid, MVT::i32)); Ops.push_back(DAG.getConstant(CCMask, MVT::i32)); Ops.push_back(Flags); @@ -1015,7 +1556,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(); @@ -1024,18 +1565,18 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SDValue Result; if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { - // Make sure that the offset is aligned to a halfword. If it isn't, - // create an "anchor" at the previous 12-bit boundary. - // FIXME check whether there is a better way of handling this. - if (Offset & 1) { - Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, - Offset & ~uint64_t(0xfff)); - Offset &= 0xfff; - } else { - Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset); + // Assign anchors at 1<<12 byte boundaries. + uint64_t Anchor = Offset & ~uint64_t(0xfff); + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + + // The offset can be folded into the address if it is aligned to a halfword. + Offset -= Anchor; + if (Offset != 0 && (Offset & 1) == 0) { + SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); + Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); Offset = 0; } - Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } else { Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); @@ -1054,7 +1595,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); EVT PtrVT = getPointerTy(); TLSModel::Model model = TM.getTLSModel(GV); @@ -1093,7 +1634,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SelectionDAG &DAG) const { - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); const BlockAddress *BA = Node->getBlockAddress(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(); @@ -1105,7 +1646,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const { - DebugLoc DL = JT->getDebugLoc(); + SDLoc DL(JT); EVT PtrVT = getPointerTy(); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); @@ -1115,7 +1656,7 @@ SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const { - DebugLoc DL = CP->getDebugLoc(); + SDLoc DL(CP); EVT PtrVT = getPointerTy(); SDValue Result; @@ -1132,29 +1673,38 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue In = Op.getOperand(0); EVT InVT = In.getValueType(); EVT ResVT = Op.getValueType(); - SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); - SDValue Shift32 = DAG.getConstant(32, MVT::i64); if (InVT == MVT::i32 && ResVT == MVT::f32) { - SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); - SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32); - SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift); - SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, - MVT::f32, Out64, SubReg32); - return SDValue(Out, 0); + SDValue In64; + if (Subtarget.hasHighWord()) { + SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, + MVT::i64); + In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, + MVT::i64, SDValue(U64, 0), In); + } else { + In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); + In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, + DAG.getConstant(32, MVT::i64)); + } + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); + return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, + DL, MVT::f32, Out64); } if (InVT == MVT::f32 && ResVT == MVT::i32) { SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); - SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, - MVT::f64, SDValue(U64, 0), In, SubReg32); - SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0)); - SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32); - SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); - return Out; + SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, + MVT::f64, SDValue(U64, 0), In); + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); + if (Subtarget.hasHighWord()) + return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, + MVT::i32, Out64); + SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, + DAG.getConstant(32, MVT::i64)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); } llvm_unreachable("Unexpected bitcast combination"); } @@ -1169,7 +1719,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); // The initial values of each field. const unsigned NumFields = 4; @@ -1203,7 +1753,7 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SDValue SrcPtr = Op.getOperand(2); const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, @@ -1214,7 +1764,7 @@ SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); unsigned SPReg = getStackPointerRegisterToSaveRestore(); @@ -1237,18 +1787,64 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, 2, DL); } -SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, +SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); - assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI"); + SDLoc DL(Op); + SDValue Ops[2]; + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else { + // Do a full 128-bit multiplication based on UMUL_LOHI64: + // + // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) + // + // but using the fact that the upper halves are either all zeros + // or all ones: + // + // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) + // + // and grouping the right terms together since they are quicker than the + // multiplication: + // + // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) + SDValue C63 = DAG.getConstant(63, MVT::i64); + SDValue LL = Op.getOperand(0); + SDValue RL = Op.getOperand(1); + SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); + SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. SMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + LL, RL, Ops[1], Ops[0]); + SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); + SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); + SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); + Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); + } + return DAG.getMergeValues(Ops, 2, DL); +} - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. UMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. +SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. UMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, 2, DL); } @@ -1257,19 +1853,24 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); + unsigned Opcode; // We use DSGF for 32-bit division. if (is32Bit(VT)) { Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); - Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1); - } + Opcode = SystemZISD::SDIVREM32; + } else if (DAG.ComputeNumSignBits(Op1) > 32) { + Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); + Opcode = SystemZISD::SDIVREM32; + } else + Opcode = SystemZISD::SDIVREM64; // DSG(F) takes a 64-bit dividend, so the even register in the GR128 // input is "don't care". The instruction returns the remainder in // the even register and the quotient in the odd register. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64, + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, 2, DL); } @@ -1277,7 +1878,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); // DL(G) uses a double-width dividend, so we need to clear the even // register in the GR128 input. The instruction returns the remainder @@ -1332,22 +1933,20 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { // high 32 bits and just masks out low bits. We can skip it if so. if (HighOp.getOpcode() == ISD::AND && HighOp.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1)); - uint64_t Mask = MaskNode->getZExtValue() | Masks[High]; - if ((Mask >> 32) == 0xffffffff) - HighOp = HighOp.getOperand(0); + SDValue HighOp0 = HighOp.getOperand(0); + uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue(); + if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) + HighOp = HighOp0; } // Take advantage of the fact that all GR32 operations only change the // low 32 bits by truncating Low to an i32 and inserting it directly // using a subreg. The interesting cases are those where the truncation // can be folded. - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); - SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); - SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, - MVT::i64, HighOp, Low32, SubReg32); - return SDValue(Result, 0); + return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, + MVT::i64, HighOp, Low32); } // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first @@ -1368,7 +1967,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SDValue Addr = Node->getBasePtr(); SDValue Src2 = Node->getVal(); MachineMemOperand *MMO = Node->getMemOperand(); - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); EVT PtrVT = Addr.getValueType(); // Convert atomic subtracts of constants into additions. @@ -1442,7 +2041,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, SDValue CmpVal = Node->getOperand(2); SDValue SwapVal = Node->getOperand(3); MachineMemOperand *MMO = Node->getMemOperand(); - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); EVT PtrVT = Addr.getValueType(); // Get the address of the containing word. @@ -1474,7 +2073,7 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); - return DAG.getCopyFromReg(Op.getOperand(0), Op.getDebugLoc(), + return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), SystemZ::R15D, Op.getValueType()); } @@ -1482,10 +2081,30 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); - return DAG.getCopyToReg(Op.getOperand(0), Op.getDebugLoc(), + return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op), SystemZ::R15D, Op.getOperand(1)); } +SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, + SelectionDAG &DAG) const { + bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + if (!IsData) + // Just preserve the chain. + return Op.getOperand(0); + + bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; + MemIntrinsicSDNode *Node = cast<MemIntrinsicSDNode>(Op.getNode()); + SDValue Ops[] = { + Op.getOperand(0), + DAG.getConstant(Code, MVT::i32), + Op.getOperand(1) + }; + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), + Node->getVTList(), Ops, array_lengthof(Ops), + Node->getMemoryVT(), Node->getMemOperand()); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1493,6 +2112,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerBR_CC(Op, DAG); case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); + case ISD::SETCC: + return lowerSETCC(Op, DAG); case ISD::GlobalAddress: return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); case ISD::GlobalTLSAddress: @@ -1511,6 +2132,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerVACOPY(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SMUL_LOHI: + return lowerSMUL_LOHI(Op, DAG); case ISD::UMUL_LOHI: return lowerUMUL_LOHI(Op, DAG); case ISD::SDIVREM: @@ -1547,6 +2170,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKSAVE(Op, DAG); case ISD::STACKRESTORE: return lowerSTACKRESTORE(Op, DAG); + case ISD::PREFETCH: + return lowerPREFETCH(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -1557,9 +2182,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { OPCODE(RET_FLAG); OPCODE(CALL); + OPCODE(SIBCALL); OPCODE(PCREL_WRAPPER); - OPCODE(CMP); - OPCODE(UCMP); + OPCODE(PCREL_OFFSET); + OPCODE(ICMP); + OPCODE(FCMP); + OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); @@ -1568,6 +2196,20 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SDIVREM64); OPCODE(UDIVREM32); OPCODE(UDIVREM64); + OPCODE(MVC); + OPCODE(MVC_LOOP); + OPCODE(NC); + OPCODE(NC_LOOP); + OPCODE(OC); + OPCODE(OC_LOOP); + OPCODE(XC); + OPCODE(XC_LOOP); + OPCODE(CLC); + OPCODE(CLC_LOOP); + OPCODE(STRCMP); + OPCODE(STPCPY); + OPCODE(SEARCH_STRING); + OPCODE(IPM); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -1580,6 +2222,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(PREFETCH); } return NULL; #undef OPCODE @@ -1609,6 +2252,31 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, return NewMBB; } +// Split MBB before MI and return the new block (the one that contains MI). +static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +// Force base value Base into a register before MI. Return the register. +static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, + const SystemZInstrInfo *TII) { + if (Base.isReg()) + return Base.getReg(); + + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg) + .addOperand(Base).addImm(0).addReg(0); + return Reg; +} + // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr *MI, @@ -1618,21 +2286,20 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, unsigned DestReg = MI->getOperand(0).getReg(); unsigned TrueReg = MI->getOperand(1).getReg(); unsigned FalseReg = MI->getOperand(2).getReg(); - unsigned CCMask = MI->getOperand(3).getImm(); + unsigned CCValid = MI->getOperand(3).getImm(); + unsigned CCMask = MI->getOperand(4).getImm(); DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); // StartMBB: - // ... - // TrueVal = ... - // cmpTY ccX, r1, r2 - // jCC JoinMBB + // BRC CCMask, JoinMBB // # fallthrough to FalseMBB MBB = StartMBB; - BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); @@ -1645,7 +2312,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; - BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg) + BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) .addReg(TrueReg).addMBB(StartMBB) .addReg(FalseReg).addMBB(FalseMBB); @@ -1653,6 +2320,69 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, return JoinMBB; } +// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. +// StoreOpcode is the store to use and Invert says whether the store should +// happen when the condition is false rather than true. If a STORE ON +// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. +MachineBasicBlock * +SystemZTargetLowering::emitCondStore(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + + unsigned SrcReg = MI->getOperand(0).getReg(); + MachineOperand Base = MI->getOperand(1); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned IndexReg = MI->getOperand(3).getReg(); + unsigned CCValid = MI->getOperand(4).getImm(); + unsigned CCMask = MI->getOperand(5).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); + + // Use STOCOpcode if possible. We could use different store patterns in + // order to avoid matching the index register, but the performance trade-offs + // might be more complicated in that case. + if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (Invert) + CCMask ^= CCValid; + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp) + .addImm(CCValid).addImm(CCMask); + MI->eraseFromParent(); + return MBB; + } + + // Get the condition needed to branch around the store. + if (!Invert) + CCMask ^= CCValid; + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // store %SrcReg, %Disp(%Index,%Base) + // # fallthrough to JoinMBB + MBB = FalseMBB; + BuildMI(MBB, DL, TII->get(StoreOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); + MBB->addSuccessor(JoinMBB); + + MI->eraseFromParent(); + return JoinMBB; +} + // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. @@ -1669,7 +2399,6 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. @@ -1706,7 +2435,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); // StartMBB: @@ -1740,11 +2469,11 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, .addReg(RotatedOldVal).addOperand(Src2); if (BitSize < 32) // XILF with the upper BitSize bits set. - BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) + BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize))); else if (BitSize == 32) // XILF with every bit set. - BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) + BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) .addReg(Tmp).addImm(~uint32_t(0)); else { // Use LCGR and add -1 to the result, which is more compact than @@ -1769,7 +2498,8 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); - BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); @@ -1792,7 +2522,6 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. @@ -1828,7 +2557,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); @@ -1846,7 +2575,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] // %RotatedOldVal = RLL %OldVal, 0(%BitShift) // CompareOpcode %RotatedOldVal, %Src2 - // BRCL KeepOldMask, UpdateMBB + // BRC KeepOldMask, UpdateMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigVal).addMBB(StartMBB) @@ -1856,8 +2585,8 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, .addReg(OldVal).addReg(BitShift).addImm(0); BuildMI(MBB, DL, TII->get(CompareOpcode)) .addReg(RotatedOldVal).addReg(Src2); - BuildMI(MBB, DL, TII->get(SystemZ::BRCL)) - .addImm(KeepOldMask).addMBB(UpdateMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); MBB->addSuccessor(UpdateMBB); MBB->addSuccessor(UseAltMBB); @@ -1887,7 +2616,8 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); - BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); @@ -1903,7 +2633,6 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); // Extract the operands. Base can be a register or a frame index. unsigned Dest = MI->getOperand(0).getReg(); @@ -1935,7 +2664,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); @@ -1978,7 +2707,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::CR)) .addReg(Dest).addReg(RetryCmpVal); - BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); MBB->addSuccessor(DoneMBB); MBB->addSuccessor(SetMBB); @@ -1998,7 +2729,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); - BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); @@ -2008,8 +2740,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, // Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true // if the high register of the GR128 value must be cleared or false if -// it's "don't care". SubReg is subreg_odd32 when extending a GR32 -// and subreg_odd when extending a GR64. +// it's "don't care". SubReg is subreg_l32 when extending a GR32 +// and subreg_l64 when extending a GR64. MachineBasicBlock * SystemZTargetLowering::emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, @@ -2031,7 +2763,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) .addImm(0); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) - .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high); + .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); In128 = NewIn128; } BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) @@ -2041,9 +2773,238 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, return MBB; } +MachineBasicBlock * +SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + MachineOperand DestBase = earlyUseOperand(MI->getOperand(0)); + uint64_t DestDisp = MI->getOperand(1).getImm(); + MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2)); + uint64_t SrcDisp = MI->getOperand(3).getImm(); + uint64_t Length = MI->getOperand(4).getImm(); + + // When generating more than one CLC, all but the last will need to + // branch to the end when a difference is found. + MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? + splitBlockAfter(MI, MBB) : 0); + + // Check for the loop form, in which operand 5 is the trip count. + if (MI->getNumExplicitOperands() > 5) { + bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); + + uint64_t StartCountReg = MI->getOperand(5).getReg(); + uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); + uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : + forceReg(MI, DestBase, TII)); + + const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; + uint64_t ThisSrcReg = MRI.createVirtualRegister(RC); + uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg : + MRI.createVirtualRegister(RC)); + uint64_t NextSrcReg = MRI.createVirtualRegister(RC); + uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg : + MRI.createVirtualRegister(RC)); + + RC = &SystemZ::GR64BitRegClass; + uint64_t ThisCountReg = MRI.createVirtualRegister(RC); + uint64_t NextCountReg = MRI.createVirtualRegister(RC); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); + + // StartMBB: + // # fall through to LoopMMB + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %ThisDestReg = phi [ %StartDestReg, StartMBB ], + // [ %NextDestReg, NextMBB ] + // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], + // [ %NextSrcReg, NextMBB ] + // %ThisCountReg = phi [ %StartCountReg, StartMBB ], + // [ %NextCountReg, NextMBB ] + // ( PFD 2, 768+DestDisp(%ThisDestReg) ) + // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) + // ( JLH EndMBB ) + // + // The prefetch is used only for MVC. The JLH is used only for CLC. + MBB = LoopMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) + .addReg(StartDestReg).addMBB(StartMBB) + .addReg(NextDestReg).addMBB(NextMBB); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) + .addReg(StartSrcReg).addMBB(StartMBB) + .addReg(NextSrcReg).addMBB(NextMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) + .addReg(StartCountReg).addMBB(StartMBB) + .addReg(NextCountReg).addMBB(NextMBB); + if (Opcode == SystemZ::MVC) + BuildMI(MBB, DL, TII->get(SystemZ::PFD)) + .addImm(SystemZ::PFD_WRITE) + .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); + BuildMI(MBB, DL, TII->get(Opcode)) + .addReg(ThisDestReg).addImm(DestDisp).addImm(256) + .addReg(ThisSrcReg).addImm(SrcDisp); + if (EndMBB) { + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + } + + // NextMBB: + // %NextDestReg = LA 256(%ThisDestReg) + // %NextSrcReg = LA 256(%ThisSrcReg) + // %NextCountReg = AGHI %ThisCountReg, -1 + // CGHI %NextCountReg, 0 + // JLH LoopMBB + // # fall through to DoneMMB + // + // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + MBB = NextMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) + .addReg(ThisDestReg).addImm(256).addReg(0); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) + .addReg(ThisSrcReg).addImm(256).addReg(0); + BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) + .addReg(ThisCountReg).addImm(-1); + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(NextCountReg).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + DestBase = MachineOperand::CreateReg(NextDestReg, false); + SrcBase = MachineOperand::CreateReg(NextSrcReg, false); + Length &= 255; + MBB = DoneMBB; + } + // Handle any remaining bytes with straight-line code. + while (Length > 0) { + uint64_t ThisLength = std::min(Length, uint64_t(256)); + // The previous iteration might have created out-of-range displacements. + // Apply them using LAY if so. + if (!isUInt<12>(DestDisp)) { + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(DestBase).addImm(DestDisp).addReg(0); + DestBase = MachineOperand::CreateReg(Reg, false); + DestDisp = 0; + } + if (!isUInt<12>(SrcDisp)) { + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(SrcBase).addImm(SrcDisp).addReg(0); + SrcBase = MachineOperand::CreateReg(Reg, false); + SrcDisp = 0; + } + BuildMI(*MBB, MI, DL, TII->get(Opcode)) + .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength) + .addOperand(SrcBase).addImm(SrcDisp); + DestDisp += ThisLength; + SrcDisp += ThisLength; + Length -= ThisLength; + // If there's another CLC to go, branch to the end if a difference + // was found. + if (EndMBB && Length > 0) { + MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + MBB = NextMBB; + } + } + if (EndMBB) { + MBB->addSuccessor(EndMBB); + MBB = EndMBB; + MBB->addLiveIn(SystemZ::CC); + } + + MI->eraseFromParent(); + return MBB; +} + +// Decompose string pseudo-instruction MI into a loop that continually performs +// Opcode until CC != 3. +MachineBasicBlock * +SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + uint64_t End1Reg = MI->getOperand(0).getReg(); + uint64_t Start1Reg = MI->getOperand(1).getReg(); + uint64_t Start2Reg = MI->getOperand(2).getReg(); + uint64_t CharReg = MI->getOperand(3).getReg(); + + const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; + uint64_t This1Reg = MRI.createVirtualRegister(RC); + uint64_t This2Reg = MRI.createVirtualRegister(RC); + uint64_t End2Reg = MRI.createVirtualRegister(RC); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // # fall through to LoopMMB + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] + // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] + // R0L = %CharReg + // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L + // JO LoopMBB + // # fall through to DoneMMB + // + // The load of R0L can be hoisted by post-RA LICM. + MBB = LoopMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) + .addReg(Start1Reg).addMBB(StartMBB) + .addReg(End1Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) + .addReg(Start2Reg).addMBB(StartMBB) + .addReg(End2Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); + BuildMI(MBB, DL, TII->get(Opcode)) + .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) + .addReg(This1Reg).addReg(This2Reg); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + DoneMBB->addLiveIn(SystemZ::CC); + + MI->eraseFromParent(); + return DoneMBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { + case SystemZ::Select32Mux: case SystemZ::Select32: case SystemZ::SelectF32: case SystemZ::Select64: @@ -2051,12 +3012,45 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::SelectF128: return emitSelect(MI, MBB); + case SystemZ::CondStore8Mux: + return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); + case SystemZ::CondStore8MuxInv: + return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); + case SystemZ::CondStore16Mux: + return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); + case SystemZ::CondStore16MuxInv: + return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); + case SystemZ::CondStore8: + return emitCondStore(MI, MBB, SystemZ::STC, 0, false); + case SystemZ::CondStore8Inv: + return emitCondStore(MI, MBB, SystemZ::STC, 0, true); + case SystemZ::CondStore16: + return emitCondStore(MI, MBB, SystemZ::STH, 0, false); + case SystemZ::CondStore16Inv: + return emitCondStore(MI, MBB, SystemZ::STH, 0, true); + case SystemZ::CondStore32: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); + case SystemZ::CondStore32Inv: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); + case SystemZ::CondStore64: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); + case SystemZ::CondStore64Inv: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); + case SystemZ::CondStoreF32: + return emitCondStore(MI, MBB, SystemZ::STE, 0, false); + case SystemZ::CondStoreF32Inv: + return emitCondStore(MI, MBB, SystemZ::STE, 0, true); + case SystemZ::CondStoreF64: + return emitCondStore(MI, MBB, SystemZ::STD, 0, false); + case SystemZ::CondStoreF64Inv: + return emitCondStore(MI, MBB, SystemZ::STD, 0, true); + case SystemZ::AEXT128_64: - return emitExt128(MI, MBB, false, SystemZ::subreg_low); + return emitExt128(MI, MBB, false, SystemZ::subreg_l64); case SystemZ::ZEXT128_32: - return emitExt128(MI, MBB, true, SystemZ::subreg_low32); + return emitExt128(MI, MBB, true, SystemZ::subreg_l32); case SystemZ::ZEXT128_64: - return emitExt128(MI, MBB, true, SystemZ::subreg_low); + return emitExt128(MI, MBB, true, SystemZ::subreg_l64); case SystemZ::ATOMIC_SWAPW: return emitAtomicLoadBinary(MI, MBB, 0, 0); @@ -2092,98 +3086,98 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::ATOMIC_LOADW_NR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); case SystemZ::ATOMIC_LOADW_NILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); case SystemZ::ATOMIC_LOAD_NR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); - case SystemZ::ATOMIC_LOAD_NILL32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32); - case SystemZ::ATOMIC_LOAD_NILH32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32); - case SystemZ::ATOMIC_LOAD_NILF32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32); - case SystemZ::ATOMIC_LOAD_NGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); case SystemZ::ATOMIC_LOAD_NILL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); case SystemZ::ATOMIC_LOAD_NILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64); - case SystemZ::ATOMIC_LOAD_NIHL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64); - case SystemZ::ATOMIC_LOAD_NIHH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); case SystemZ::ATOMIC_LOAD_NILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64); - case SystemZ::ATOMIC_LOAD_NIHF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); + case SystemZ::ATOMIC_LOAD_NGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); + case SystemZ::ATOMIC_LOAD_NILL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); + case SystemZ::ATOMIC_LOAD_NILH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); + case SystemZ::ATOMIC_LOAD_NIHL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64); + case SystemZ::ATOMIC_LOAD_NIHH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64); + case SystemZ::ATOMIC_LOAD_NILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); + case SystemZ::ATOMIC_LOAD_NIHF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64); case SystemZ::ATOMIC_LOADW_OR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); case SystemZ::ATOMIC_LOADW_OILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); case SystemZ::ATOMIC_LOAD_OR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); - case SystemZ::ATOMIC_LOAD_OILL32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32); - case SystemZ::ATOMIC_LOAD_OILH32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32); - case SystemZ::ATOMIC_LOAD_OILF32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32); - case SystemZ::ATOMIC_LOAD_OGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); case SystemZ::ATOMIC_LOAD_OILL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); case SystemZ::ATOMIC_LOAD_OILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64); - case SystemZ::ATOMIC_LOAD_OIHL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64); - case SystemZ::ATOMIC_LOAD_OIHH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); case SystemZ::ATOMIC_LOAD_OILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64); - case SystemZ::ATOMIC_LOAD_OIHF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); + case SystemZ::ATOMIC_LOAD_OGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); + case SystemZ::ATOMIC_LOAD_OILL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); + case SystemZ::ATOMIC_LOAD_OILH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); + case SystemZ::ATOMIC_LOAD_OIHL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64); + case SystemZ::ATOMIC_LOAD_OIHH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64); + case SystemZ::ATOMIC_LOAD_OILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); + case SystemZ::ATOMIC_LOAD_OIHF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64); case SystemZ::ATOMIC_LOADW_XR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); case SystemZ::ATOMIC_LOADW_XILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); case SystemZ::ATOMIC_LOAD_XR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); - case SystemZ::ATOMIC_LOAD_XILF32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32); + case SystemZ::ATOMIC_LOAD_XILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); case SystemZ::ATOMIC_LOAD_XGR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); - case SystemZ::ATOMIC_LOAD_XILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64); - case SystemZ::ATOMIC_LOAD_XIHF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64); + case SystemZ::ATOMIC_LOAD_XILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); + case SystemZ::ATOMIC_LOAD_XIHF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64); case SystemZ::ATOMIC_LOADW_NRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); case SystemZ::ATOMIC_LOADW_NILHi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); case SystemZ::ATOMIC_LOAD_NRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); - case SystemZ::ATOMIC_LOAD_NILL32i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true); - case SystemZ::ATOMIC_LOAD_NILH32i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true); - case SystemZ::ATOMIC_LOAD_NILF32i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true); - case SystemZ::ATOMIC_LOAD_NGRi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); case SystemZ::ATOMIC_LOAD_NILLi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); case SystemZ::ATOMIC_LOAD_NILHi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true); - case SystemZ::ATOMIC_LOAD_NIHLi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true); - case SystemZ::ATOMIC_LOAD_NIHHi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); case SystemZ::ATOMIC_LOAD_NILFi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true); - case SystemZ::ATOMIC_LOAD_NIHFi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); + case SystemZ::ATOMIC_LOAD_NGRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); + case SystemZ::ATOMIC_LOAD_NILL64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); + case SystemZ::ATOMIC_LOAD_NILH64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); + case SystemZ::ATOMIC_LOAD_NIHL64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true); + case SystemZ::ATOMIC_LOAD_NIHH64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true); + case SystemZ::ATOMIC_LOAD_NILF64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); + case SystemZ::ATOMIC_LOAD_NIHF64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true); case SystemZ::ATOMIC_LOADW_MIN: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, @@ -2227,6 +3221,27 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); + case SystemZ::MVCSequence: + case SystemZ::MVCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::MVC); + case SystemZ::NCSequence: + case SystemZ::NCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::NC); + case SystemZ::OCSequence: + case SystemZ::OCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::OC); + case SystemZ::XCSequence: + case SystemZ::XCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::XC); + case SystemZ::CLCSequence: + case SystemZ::CLCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::CLC); + case SystemZ::CLSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::CLST); + case SystemZ::MVSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::MVST); + case SystemZ::SRSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::SRST); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h index eea820c..c6dcca6 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_SystemZ_ISELLOWERING_H #include "SystemZ.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -31,17 +32,32 @@ namespace SystemZISD { // is the target address. The arguments start at operand 2. // There is an optional glue operand at the end. CALL, + SIBCALL, // Wraps a TargetGlobalAddress that should be loaded using PC-relative // accesses (LARL). Operand 0 is the address. PCREL_WRAPPER, - // Signed integer and floating-point comparisons. The operands are the - // two values to compare. - CMP, + // Used in cases where an offset is applied to a TargetGlobalAddress. + // Operand 0 is the full TargetGlobalAddress and operand 1 is a + // PCREL_WRAPPER for an anchor point. This is used so that we can + // cheaply refer to either the full address or the anchor point + // as a register base. + PCREL_OFFSET, - // Likewise unsigned integer comparison. - UCMP, + // Integer comparisons. There are three operands: the two values + // to compare, and an integer of type SystemZICMP. + ICMP, + + // Floating-point comparisons. The two operands are the values to compare. + FCMP, + + // Test under mask. The first operand is ANDed with the second operand + // and the condition codes are set on the result. The third operand is + // a boolean that is true if the condition codes need to distinguish + // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the + // register forms do but the memory forms don't). + TM, // Branches if a condition is true. Operand 0 is the chain operand; // operand 1 is the 4-bit condition-code mask, with bit N in @@ -67,10 +83,55 @@ namespace SystemZISD { // first input operands are GR128s. The trailing numbers are the // widths of the second operand in bits. UMUL_LOHI64, + SDIVREM32, SDIVREM64, UDIVREM32, UDIVREM64, + // Use a series of MVCs to copy bytes from one memory location to another. + // The operands are: + // - the target address + // - the source address + // - the constant length + // + // This isn't a memory opcode because we'd need to attach two + // MachineMemOperands rather than one. + MVC, + + // Like MVC, but implemented as a loop that handles X*256 bytes + // followed by straight-line code to handle the rest (if any). + // The value of X is passed as an additional operand. + MVC_LOOP, + + // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR). + NC, + NC_LOOP, + OC, + OC_LOOP, + XC, + XC_LOOP, + + // Use CLC to compare two blocks of memory, with the same comments + // as for MVC and MVC_LOOP. + CLC, + CLC_LOOP, + + // Use an MVST-based sequence to implement stpcpy(). + STPCPY, + + // Use a CLST-based sequence to implement strcmp(). The two input operands + // are the addresses of the strings to compare. + STRCMP, + + // Use an SRST-based sequence to search a block of memory. The first + // operand is the end address, the second is the start, and the third + // is the character to search for. CC is set to 1 on success and 2 + // on failure. + SEARCH_STRING, + + // Store the CC value in bits 29 and 28 of an integer. + IPM, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -102,7 +163,27 @@ namespace SystemZISD { // operand into the high bits // Operand 4: the negative of operand 2, for rotating the other way // Operand 5: the width of the field in bits (8 or 16) - ATOMIC_CMP_SWAPW + ATOMIC_CMP_SWAPW, + + // Prefetch from the second operand using the 4-bit control code in + // the first operand. The code is 1 for a load prefetch and 2 for + // a store prefetch. + PREFETCH + }; + + // Return true if OPCODE is some kind of PC-relative address. + inline bool isPCREL(unsigned Opcode) { + return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; + } +} + +namespace SystemZICMP { + // Describes whether an integer comparison needs to be signed or unsigned, + // or whether either type is OK. + enum { + Any, + UnsignedOnly, + SignedOnly }; } @@ -117,17 +198,19 @@ public: virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE { return MVT::i32; } - virtual EVT getSetCCResultType(EVT VT) const { - return MVT::i32; - } - virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE { - return true; - } - virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + virtual EVT getSetCCResultType(LLVMContext &, EVT) const LLVM_OVERRIDE; + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE; + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const LLVM_OVERRIDE; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const + LLVM_OVERRIDE; + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const + LLVM_OVERRIDE; + virtual bool isTruncateFree(Type *, Type *) const LLVM_OVERRIDE; + virtual bool isTruncateFree(EVT, EVT) const LLVM_OVERRIDE; virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE; virtual std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const LLVM_OVERRIDE; + MVT VT) const LLVM_OVERRIDE; virtual TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE; virtual TargetLowering::ConstraintWeight @@ -143,11 +226,13 @@ public: MachineBasicBlock *BB) const LLVM_OVERRIDE; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const LLVM_OVERRIDE; + virtual bool allowTruncateForTailCall(Type *, Type *) const LLVM_OVERRIDE; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const LLVM_OVERRIDE; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, + SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE; virtual SDValue LowerCall(CallLoweringInfo &CLI, @@ -158,13 +243,14 @@ public: CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE; + SDLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE; private: const SystemZSubtarget &Subtarget; const SystemZTargetMachine &TM; // Implement LowerOperation for individual opcodes. + SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, @@ -178,6 +264,7 @@ private: SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; @@ -188,10 +275,24 @@ private: SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + + // If the last instruction before MBBI in MBB was some form of COMPARE, + // try to replace it with a COMPARE AND BRANCH just before MBBI. + // CCMask and Target are the BRC-like operands for the branch. + // Return true if the change was made. + bool convertPrevCompareToBranch(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI, + unsigned CCMask, + MachineBasicBlock *Target) const; // Implement EmitInstrWithCustomInserter for individual operation types. MachineBasicBlock *emitSelect(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitCondStore(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const; @@ -206,6 +307,12 @@ private: unsigned BitSize) const; MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; + MachineBasicBlock *emitStringWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 7c9f0e6..6080046 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Control-flow instructions +// Select instructions //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. @@ -16,6 +16,11 @@ def SelectF32 : SelectWrapper<FP32>; def SelectF64 : SelectWrapper<FP64>; def SelectF128 : SelectWrapper<FP128>; +defm CondStoreF32 : CondStores<FP32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; +defm CondStoreF64 : CondStores<FP64, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + //===----------------------------------------------------------------------===// // Move instructions //===----------------------------------------------------------------------===// @@ -29,57 +34,69 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { // Moves between two floating-point registers. let neverHasSideEffects = 1 in { - def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; - def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; - def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; + def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>; + def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>; + def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>; +} + +// Moves between two floating-point registers that also set the condition +// codes. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + defm LTEBR : LoadAndTestRRE<"lteb", 0xB302, FP32>; + defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; + defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; } +def : CompareZeroFP<LTEBRCompare, FP32>; +def : CompareZeroFP<LTDBRCompare, FP64>; +def : CompareZeroFP<LTXBRCompare, FP128>; // Moves between 64-bit integer and floating-point registers. -def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; -def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; +def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; +def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>; // fcopysign with an FP32 result. let isCodeGenOnly = 1 in { - def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; - def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; + def CPSDRss : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP32>; + def CPSDRsd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP64>; } -// The sign of an FP128 is in the high register. Give the CPSDRsd -// operands in R1, R2, R3 order. +// The sign of an FP128 is in the high register. def : Pat<(fcopysign FP32:$src1, FP128:$src2), - (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>; + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; // fcopysign with an FP64 result. let isCodeGenOnly = 1 in - def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; -def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; + def CPSDRds : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP32>; +def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>; -// The sign of an FP128 is in the high register. Give the CPSDRdd -// operands in R1, R2, R3 order. +// The sign of an FP128 is in the high register. def : Pat<(fcopysign FP64:$src1, FP128:$src2), - (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>; + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; // fcopysign with an FP128 result. Use "upper" as the high half and leave // the low half as-is. class CopySign128<RegisterOperand cls, dag upper> : Pat<(fcopysign FP128:$src1, cls:$src2), - (INSERT_SUBREG FP128:$src1, upper, subreg_high)>; + (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; -// Give the CPSDR* operands in R1, R2, R3 order. -def : CopySign128<FP32, (CPSDRds FP32:$src2, - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; -def : CopySign128<FP64, (CPSDRdd FP64:$src2, - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; -def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), - (EXTRACT_SUBREG FP128:$src1, subreg_high))>; +def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP32:$src2)>; +def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP64:$src2)>; +def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; + +defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>; +defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>; +defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>; //===----------------------------------------------------------------------===// // Load instructions //===----------------------------------------------------------------------===// let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { - defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32>; - defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>; + defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -94,8 +111,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { //===----------------------------------------------------------------------===// let SimpleBDXStore = 1 in { - defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>; - defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>; + defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; + defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -112,201 +129,232 @@ let SimpleBDXStore = 1 in { // Convert floating-point values to narrower representations, rounding // according to the current mode. The destination of LEXBR and LDXBR // is a 128-bit value, but only the first register of the pair is used. -def LEDBR : UnaryRRE<"ledbr", 0xB344, fround, FP32, FP64>; -def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; -def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; +def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>; +def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>; +def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>; def : Pat<(f32 (fround FP128:$src)), - (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>; + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; def : Pat<(f64 (fround FP128:$src)), - (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>; + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>; +def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxeb", 0xB306, fextend, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdb", 0xB305, fextend, FP128, FP64>; // Extend memory floating-point values to wider representations. -def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>; -def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>; -def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>; +def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; +def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; // Convert a signed integer register value to a floating-point one. -let Defs = [PSW] in { - def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; - def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; - def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; - - def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; - def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; - def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; -} +def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32, GR32>; +def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64, GR32>; +def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>; + +def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32, GR64>; +def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64, GR64>; +def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>; // Convert a floating-point register value to a signed integer value, // with the second operand (modifier M3) specifying the rounding mode. -let Defs = [PSW] in { - def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>; - def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>; - def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>; - - def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>; - def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>; - def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>; +let Defs = [CC] in { + def CFEBR : UnaryRRF<"cfeb", 0xB398, GR32, FP32>; + def CFDBR : UnaryRRF<"cfdb", 0xB399, GR32, FP64>; + def CFXBR : UnaryRRF<"cfxb", 0xB39A, GR32, FP128>; + + def CGEBR : UnaryRRF<"cgeb", 0xB3A8, GR64, FP32>; + def CGDBR : UnaryRRF<"cgdb", 0xB3A9, GR64, FP64>; + def CGXBR : UnaryRRF<"cgxb", 0xB3AA, GR64, FP128>; } // fp_to_sint always rounds towards zero, which is modifier value 5. -def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR FP32:$src, 5)>; -def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR FP64:$src, 5)>; -def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>; +def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; -def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR FP32:$src, 5)>; -def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR FP64:$src, 5)>; -def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>; +def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; //===----------------------------------------------------------------------===// // Unary arithmetic //===----------------------------------------------------------------------===// // Negation (Load Complement). -let Defs = [PSW] in { - def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32, FP32>; - def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64, FP64>; - def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>; + def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>; + def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>; } // Absolute value (Load Positive). -let Defs = [PSW] in { - def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32, FP32>; - def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64, FP64>; - def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>; + def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>; + def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>; } // Negative absolute value (Load Negative). -let Defs = [PSW] in { - def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32, FP32>; - def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64, FP64>; - def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>; + def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>; + def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>; } // Square root. -def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>; -def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>; -def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>; +def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>; +def SQDBR : UnaryRRE<"sqdb", 0xB315, fsqrt, FP64, FP64>; +def SQXBR : UnaryRRE<"sqxb", 0xB316, fsqrt, FP128, FP128>; -def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32>; -def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>; +def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>; +def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>; // Round to an integer, with the second operand (modifier M3) specifying -// the rounding mode. -// -// These forms always check for inexact conditions. z196 added versions -// that allow this to suppressed (as for fnearbyint), but we don't yet -// support -march=z196. -let Defs = [PSW] in { - def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32, FP32>; - def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64, FP64>; - def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>; -} +// the rounding mode. These forms always check for inexact conditions. +def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>; +def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>; +def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>; + +// Extended forms of the previous three instructions. M4 can be set to 4 +// to suppress detection of inexact conditions. +def FIEBRA : UnaryRRF4<"fiebra", 0xB357, FP32, FP32>, + Requires<[FeatureFPExtension]>; +def FIDBRA : UnaryRRF4<"fidbra", 0xB35F, FP64, FP64>, + Requires<[FeatureFPExtension]>; +def FIXBRA : UnaryRRF4<"fixbra", 0xB347, FP128, FP128>, + Requires<[FeatureFPExtension]>; // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. -def : Pat<(frint FP32:$src), (FIEBR FP32:$src, 0)>; -def : Pat<(frint FP64:$src), (FIDBR FP64:$src, 0)>; -def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>; +def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; + +let Predicates = [FeatureFPExtension] in { + // fnearbyint is like frint but does not detect inexact conditions. + def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + + // floor is no longer allowed to raise an inexact condition, + // so restrict it to the cases where the condition can be suppressed. + // Mode 7 is round towards -inf. + def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; + def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; + def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; + + // Same idea for ceil, where mode 6 is round towards +inf. + def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; + def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; + def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; + + // Same idea for trunc, where mode 5 is round towards zero. + def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; + def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; + def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; + + // Same idea for round, where mode 1 is round towards nearest with + // ties away from zero. + def : Pat<(frnd FP32:$src), (FIEBRA 1, FP32:$src, 4)>; + def : Pat<(frnd FP64:$src), (FIDBRA 1, FP64:$src, 4)>; + def : Pat<(frnd FP128:$src), (FIXBRA 1, FP128:$src, 4)>; +} //===----------------------------------------------------------------------===// // Binary arithmetic //===----------------------------------------------------------------------===// // Addition. -let Defs = [PSW] in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { let isCommutable = 1 in { - def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>; - def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>; - def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>; + def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axb", 0xB34A, fadd, FP128, FP128>; } - def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>; - def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>; + def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; + def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; } // Subtraction. -let Defs = [PSW] in { - def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>; - def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>; - def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def SEBR : BinaryRRE<"seb", 0xB30B, fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>; - def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load>; - def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load>; + def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; + def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; } // Multiplication. let isCommutable = 1 in { - def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>; - def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>; - def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>; + def MEEBR : BinaryRRE<"meeb", 0xB317, fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdb", 0xB31C, fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxb", 0xB34C, fmul, FP128, FP128>; } -def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>; -def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load>; +def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; +def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; // f64 multiplication of two FP32 registers. -def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; +def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - FP32:$src1, subreg_32bit), FP32:$src2)>; + FP32:$src1, subreg_h32), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. -def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>; +def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (extloadf32 bdxaddr12only:$addr))), - (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit), + (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. -def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; +def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>; def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), - FP64:$src1, subreg_high), FP64:$src2)>; + FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. -def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>; +def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (extloadf64 bdxaddr12only:$addr))), - (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), bdxaddr12only:$addr)>; // Fused multiply-add. -def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>; -def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>; +def MAEBR : TernaryRRD<"maeb", 0xB30E, z_fma, FP32>; +def MADBR : TernaryRRD<"madb", 0xB31E, z_fma, FP64>; -def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>; -def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>; +def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>; +def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>; // Fused multiply-subtract. -def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>; -def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>; +def MSEBR : TernaryRRD<"mseb", 0xB30F, z_fms, FP32>; +def MSDBR : TernaryRRD<"msdb", 0xB31F, z_fms, FP64>; -def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>; -def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>; +def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>; +def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>; // Division. -def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; -def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>; -def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; +def DEBR : BinaryRRE<"deb", 0xB30D, fdiv, FP32, FP32>; +def DDBR : BinaryRRE<"ddb", 0xB31D, fdiv, FP64, FP64>; +def DXBR : BinaryRRE<"dxb", 0xB34D, fdiv, FP128, FP128>; -def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>; -def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>; +def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; +def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; //===----------------------------------------------------------------------===// // Comparisons //===----------------------------------------------------------------------===// -let Defs = [PSW] in { - def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32, FP32>; - def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64, FP64>; - def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>; +let Defs = [CC], CCValues = 0xF in { + def CEBR : CompareRRE<"ceb", 0xB309, z_fcmp, FP32, FP32>; + def CDBR : CompareRRE<"cdb", 0xB319, z_fcmp, FP64, FP64>; + def CXBR : CompareRRE<"cxb", 0xB349, z_fcmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>; - def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>; + def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index b32b7eb..a8efe16 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -21,12 +21,24 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, let Pattern = pattern; let AsmString = asmstr; - // Used to identify a group of related instructions, such as ST and STY. - string Function = ""; - - // "12" for an instruction that has a ...Y equivalent, "20" for that - // ...Y equivalent. - string PairType = "none"; + // Some instructions come in pairs, one having a 12-bit displacement + // and the other having a 20-bit displacement. Both instructions in + // the pair have the same DispKey and their DispSizes are "12" and "20" + // respectively. + string DispKey = ""; + string DispSize = "none"; + + // Many register-based <INSN>R instructions have a memory-based <INSN> + // counterpart. OpKey uniquely identifies <INSN>, while OpType is + // "reg" for <INSN>R and "mem" for <INSN>. + string OpKey = ""; + string OpType = "none"; + + // Many distinct-operands instructions have older 2-operand equivalents. + // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, + // with NumOpsValue being "2" or "3" as appropriate. + string NumOpsKey = ""; + string NumOpsValue = "none"; // True if this instruction is a simple D(X,B) load of a register // (with no sign or zero extension). @@ -46,11 +58,40 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, // operations. bit Is128Bit = 0; - let TSFlags{0} = SimpleBDXLoad; - let TSFlags{1} = SimpleBDXStore; - let TSFlags{2} = Has20BitOffset; - let TSFlags{3} = HasIndex; - let TSFlags{4} = Is128Bit; + // The access size of all memory operands in bytes, or 0 if not known. + bits<5> AccessBytes = 0; + + // If the instruction sets CC to a useful value, this gives the mask + // of all possible CC results. The mask has the same form as + // SystemZ::CCMASK_*. + bits<4> CCValues = 0; + + // The subset of CCValues that have the same meaning as they would after + // a comparison of the first operand against zero. + bits<4> CompareZeroCCMask = 0; + + // True if the instruction is conditional and if the CC mask operand + // comes first (as for BRC, etc.). + bit CCMaskFirst = 0; + + // Similar, but true if the CC mask operand comes last (as for LOC, etc.). + bit CCMaskLast = 0; + + // True if the instruction is the "logical" rather than "arithmetic" form, + // in cases where a distinction exists. + bit IsLogical = 0; + + let TSFlags{0} = SimpleBDXLoad; + let TSFlags{1} = SimpleBDXStore; + let TSFlags{2} = Has20BitOffset; + let TSFlags{3} = HasIndex; + let TSFlags{4} = Is128Bit; + let TSFlags{9-5} = AccessBytes; + let TSFlags{13-10} = CCValues; + let TSFlags{17-14} = CompareZeroCCMask; + let TSFlags{18} = CCMaskFirst; + let TSFlags{19} = CCMaskLast; + let TSFlags{20} = IsLogical; } //===----------------------------------------------------------------------===// @@ -61,8 +102,8 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, // displacement. def getDisp12Opcode : InstrMapping { let FilterClass = "InstSystemZ"; - let RowFields = ["Function"]; - let ColFields = ["PairType"]; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; let KeyCol = ["20"]; let ValueCols = [["12"]]; } @@ -70,37 +111,54 @@ def getDisp12Opcode : InstrMapping { // Return the version of an instruction that has a signed 20-bit displacement. def getDisp20Opcode : InstrMapping { let FilterClass = "InstSystemZ"; - let RowFields = ["Function"]; - let ColFields = ["PairType"]; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; let KeyCol = ["12"]; let ValueCols = [["20"]]; } +// Return the memory form of a register instruction. +def getMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["OpKey"]; + let ColFields = ["OpType"]; + let KeyCol = ["reg"]; + let ValueCols = [["mem"]]; +} + +// Return the 3-operand form of a 2-operand instruction. +def getThreeOperandOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["NumOpsKey"]; + let ColFields = ["NumOpsValue"]; + let KeyCol = ["2"]; + let ValueCols = [["3"]]; +} + //===----------------------------------------------------------------------===// // Instruction formats //===----------------------------------------------------------------------===// // // Formats are specified using operand field declarations of the form: // -// bits<4> Rn : register input or output for operand n -// bits<m> In : immediate value of width m for operand n -// bits<4> Bn : base register for address operand n -// bits<m> Dn : displacement value of width m for address operand n -// bits<4> Xn : index register for address operand n -// bits<4> Mn : mode value for operand n +// bits<4> Rn : register input or output for operand n +// bits<m> In : immediate value of width m for operand n +// bits<4> BDn : address operand n, which has a base and a displacement +// bits<m> XBDn : address operand n, which has an index, a base and a +// displacement +// bits<4> Xn : index register for address operand n +// bits<4> Mn : mode value for operand n // -// The operand numbers ("n" in the list above) follow the architecture manual, -// but the fields are always declared in assembly order, so there are some -// cases where operand "2" comes after operand "3". For address operands, -// the base register field is declared first, followed by the displacement, -// followed by the index (if any). This matches the bdaddr* and bdxaddr* -// orders. +// The operand numbers ("n" in the list above) follow the architecture manual. +// Assembly operands sometimes have a different order; in particular, R3 often +// is often written between operands 1 and 2. // //===----------------------------------------------------------------------===// class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<16> I2; @@ -111,9 +169,64 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{15-0} = I2; } +class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + bits<16> RI4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31-16} = RI4; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRIEc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<8> I2; + bits<4> M3; + bits<16> RI4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = M3; + let Inst{31-16} = RI4; + let Inst{15-8} = I2; + let Inst{7-0} = op{7-0}; +} + +class InstRIEd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-16} = I2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -133,6 +246,7 @@ class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<32> I2; @@ -146,6 +260,7 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<2, outs, ins, asmstr, pattern> { field bits<16> Inst; + field bits<16> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -158,6 +273,7 @@ class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R3; @@ -173,6 +289,7 @@ class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R2; @@ -186,14 +303,16 @@ class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R2; bits<4> R3; + bits<4> R4; let Inst{31-16} = op; let Inst{15-12} = R3; - let Inst{11-8} = 0; + let Inst{11-8} = R4; let Inst{7-4} = R1; let Inst{3-0} = R2; } @@ -201,17 +320,14 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{31-24} = op; let Inst{23-20} = R1; - let Inst{19-16} = X2; - let Inst{15-12} = B2; - let Inst{11-0} = D2; + let Inst{19-0} = XBD2; let HasIndex = 1; } @@ -219,17 +335,14 @@ class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2; + let Inst{35-16} = XBD2; let Inst{15-8} = 0; let Inst{7-0} = op{7-0}; @@ -239,18 +352,15 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<12> D2; - bits<4> X2; + bits<20> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R3; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2; + let Inst{35-16} = XBD2; let Inst{15-12} = R1; let Inst{11-8} = 0; let Inst{7-0} = op{7-0}; @@ -261,18 +371,14 @@ class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; - bits<4> B2; - bits<20> D2; - bits<4> X2; + bits<28> XBD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; - let Inst{35-32} = X2; - let Inst{31-28} = B2; - let Inst{27-16} = D2{11-0}; - let Inst{15-8} = D2{19-12}; + let Inst{35-8} = XBD2; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -282,34 +388,31 @@ class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<12> D2; + bits<16> BD2; let Inst{31-24} = op; let Inst{23-20} = R1; let Inst{19-16} = R3; - let Inst{15-12} = B2; - let Inst{11-0} = D2; + let Inst{15-0} = BD2; } class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; bits<4> R1; bits<4> R3; - bits<4> B2; - bits<20> D2; + bits<24> BD2; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; let Inst{35-32} = R3; - let Inst{31-28} = B2; - let Inst{27-16} = D2{11-0}; - let Inst{15-8} = D2{19-12}; + let Inst{31-8} = BD2; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; @@ -318,60 +421,77 @@ class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; + field bits<32> SoftFail = 0; - bits<4> B1; - bits<12> D1; + bits<16> BD1; bits<8> I2; let Inst{31-24} = op; let Inst{23-16} = I2; - let Inst{15-12} = B1; - let Inst{11-0} = D1; + let Inst{15-0} = BD1; } class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; - bits<4> B1; - bits<12> D1; + bits<16> BD1; bits<16> I2; let Inst{47-32} = op; - let Inst{31-28} = B1; - let Inst{27-16} = D1; + let Inst{31-16} = BD1; let Inst{15-0} = I2; } class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; + field bits<48> SoftFail = 0; - bits<4> B1; - bits<20> D1; + bits<24> BD1; bits<8> I2; let Inst{47-40} = op{15-8}; let Inst{39-32} = I2; - let Inst{31-28} = B1; - let Inst{27-16} = D1{11-0}; - let Inst{15-8} = D1{19-12}; + let Inst{31-8} = BD1; let Inst{7-0} = op{7-0}; let Has20BitOffset = 1; } +class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<24> BDL1; + bits<16> BD2; + + let Inst{47-40} = op; + let Inst{39-16} = BDL1; + let Inst{15-0} = BD2; +} + //===----------------------------------------------------------------------===// // Instruction definitions with semantics //===----------------------------------------------------------------------===// // -// These classes have the form <Category><Format>, where <Format> is one +// These classes have the form [Cond]<Category><Format>, where <Format> is one // of the formats defined above and where <Category> describes the inputs -// and outputs. <Category> can be one of: +// and outputs. "Cond" is used if the instruction is conditional, +// in which case the 4-bit condition-code mask is added as a final operand. +// <Category> can be one of: // // Inherent: // One register output operand and no input operands. // +// BranchUnary: +// One register output operand, one register input operand and +// one branch displacement. The instructions stores a modified +// form of the source register in the destination register and +// branches on the result. +// // Store: // One register or immediate input operand and one address input operand. // The instruction stores the first operand to the address. @@ -420,6 +540,10 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> // One output operand and five input operands. The first two operands // are registers and the other three are immediates. // +// Prefetch: +// One 4-bit immediate operand and one address operand. The immediate +// operand is 1 for a load prefetch and 2 for a store prefetch. +// // The format determines which input operands are tied to output operands, // and also determines the shape of any address operand. // @@ -432,23 +556,32 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, dag src> - : InstRRE<opcode, (outs cls:$dst), (ins), - mnemonic#"\t$dst", - [(set cls:$dst, src)]> { + : InstRRE<opcode, (outs cls:$R1), (ins), + mnemonic#"\t$R1", + [(set cls:$R1, src)]> { let R2 = 0; } +class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls> + : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2), + mnemonic##"\t$R1, $I2", []> { + let isBranch = 1; + let isTerminator = 1; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr), - mnemonic#"\t$dst1, $dst2, $addr", []> { + : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { let mayLoad = 1; } class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, pcrel32:$addr)]> { + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, pcrel32:$I2)]> { let mayStore = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -457,105 +590,206 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, } class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs), (ins cls:$src, mode:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, mode:$addr)]> { + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayStore = 1; + let AccessBytes = bytes; } class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr), - mnemonic#"\t$src, $addr", - [(operator cls:$src, mode:$addr)]> { + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayStore = 1; + let AccessBytes = bytes; } multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, - SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in - def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>; - let PairType = "20" in - def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>; + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; } } class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr), - mnemonic#"\t$from, $to, $addr", []> { + : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { let mayStore = 1; } +// StoreSI* instructions are used to store an integer to memory, but the +// addresses are more restricted than for normal stores. If we are in the +// situation of having to force either the address into a register or the +// constant into a register, it's usually better to do the latter. +// We therefore match the address in the same way as a normal store and +// only use the StoreSI* instruction if the matched address is suitable. class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, - Immediate imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, mode:$addr)]> { + Immediate imm> + : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr12pair:$BD1)]> { let mayStore = 1; } class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - Immediate imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, mode:$addr)]> { + Immediate imm> + : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr20pair:$BD1)]> { let mayStore = 1; } class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, Immediate imm> - : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator imm:$src, bdaddr12only:$addr)]> { + : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator imm:$I2, mviaddr12pair:$BD1)]> { let mayStore = 1; } multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Immediate imm> { - let Function = mnemonic in { - let PairType = "12" in - def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; - let PairType = "20" in - def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; + let DispKey = mnemonic in { + let DispSize = "12" in + def "" : StoreSI<mnemonic, siOpcode, operator, imm>; + let DispSize = "20" in + def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>; } } +class CondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), + mnemonic#"$R3\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondStoreRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, uimm8zx4:$R3), + mnemonic#"\t$R1, $BD2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +// Like CondStoreRSY, but with a fixed CC mask. +class FixedCondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<4> ccmask, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2), + mnemonic#"\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; + let R3 = ccmask; +} + class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src), - mnemonic#"\t$dst, $src", - [(set cls1:$dst, (operator cls2:$src))]>; + : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src), - mnemonic#"\t$dst, $src", - [(set cls1:$dst, (operator cls2:$src))]>; + : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode), - mnemonic#"\t$dst, $mode, $src", []>; + : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2), + mnemonic#"r\t$R1, $R3, $R2", []> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let R4 = 0; +} + +class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2, uimm8zx4:$R4), + mnemonic#"\t$R1, $R3, $R2, $R4", []>; + +// These instructions are generated by if conversion. The old value of R1 +// is added as an implicit use. +class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3), + mnemonic#"r$R3\t$R1, $R2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let CCMaskLast = 1; + let R4 = 0; +} + +// Like CondUnaryRRF, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, uimm8zx4:$R3), + mnemonic#"r\t$R1, $R2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R4 = 0; +} + +// Like CondUnaryRRF, but with a fixed CC mask. +class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, bits<4> ccmask> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R3 = ccmask; + let R4 = 0; +} class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs cls:$dst), (ins imm:$src), - mnemonic#"\t$dst, $src", - [(set cls:$dst, (operator imm:$src))]>; + : InstRI<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs cls:$dst), (ins imm:$src), - mnemonic#"\t$dst, $src", - [(set cls:$dst, (operator imm:$src))]>; + : InstRIL<opcode, (outs cls:$R1), (ins imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator imm:$I2))]>; class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator pcrel32:$addr))]> { + : InstRIL<opcode, (outs cls:$R1), (ins pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator pcrel32:$I2))]> { let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -563,148 +797,267 @@ class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, let AddedComplexity = 7; } +class CondUnaryRSY<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), + (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3), + mnemonic#"$R3\t$R1, $BD2", + [(set cls:$R1, + (z_select_ccmask (load bdaddr20only:$BD2), cls:$R1src, + cond4:$valid, cond4:$R3))]>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondUnaryRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondUnaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, uimm8zx4:$R3), + mnemonic#"\t$R1, $BD2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayLoad = 1; + let AccessBytes = bytes; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondUnaryRSY, but with a fixed CC mask. +class FixedCondUnaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<4> ccmask, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2), + mnemonic#"\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R3 = ccmask; + let mayLoad = 1; + let AccessBytes = bytes; +} + class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs cls:$dst), (ins mode:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator mode:$addr))]> { + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls> - : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator bdxaddr12only:$addr))]> { + RegisterOperand cls, bits<5> bytes> + : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr), - mnemonic#"\t$dst, $addr", - [(set cls:$dst, (operator mode:$addr))]> { + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, - SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in - def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>; - let PairType = "20" in - def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>; + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; } } class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$dst, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$dst, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } -// Here the assembly and dag operands are in natural order, -// but the first input operand maps to R3 and the second to R2. -// This is used for "CPSDR R1, R3, R2", which is equivalent to -// R1 = copysign (R3, R2). -// -// Direct uses of the instruction must pass operands in encoding order -- -// R1, R2, R3 -- so they must pass the source operands in reverse order. -class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1), - mnemonic#"\t$dst, $src1, $src2", - [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>; +class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2), + mnemonic#"r\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let R4 = 0; +} + +class BinaryRRFK<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3), + mnemonic#"rk\t$R1, $R2, $R3", + [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> { + let R4 = 0; +} + +multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>; + } +} + +multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>; + } +} class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2), + mnemonic#"\t$R1, $R3, $I2", + [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; + +multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls, + Immediate imm> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; + } } class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> - : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, - (load bdxaddr12only:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, + (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, - SDPatternOperator load> { - let Function = mnemonic ## #cls in { - let PairType = "12" in - def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bdxaddr12pair>; - let PairType = "20" in - def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, + bdxaddr12pair>; + let DispSize = "20" in + def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, bytes, bdxaddr20pair>; } } class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } @@ -712,59 +1065,83 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Operand imm> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; - let PairType = "20" in + let DispSize = "20" in def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; } } class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode> - : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src2", - [(set cls:$dst, (operator cls:$src1, mode:$src2))]> { + RegisterOperand cls> + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2), + mnemonic#"\t$R1, $BD2", + [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> { let R3 = 0; - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode> - : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), - mnemonic#"\t$dst, $src1, $src2", - [(set cls:$dst, (operator cls:$src1, mode:$src2))]>; + RegisterOperand cls> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>; + +multiclass ShiftRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : ShiftRSY<mnemonic##"k", opcode2, null_frag, cls>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : ShiftRS<mnemonic, opcode1, operator, cls>; + } +} class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls1:$src1, cls2:$src2)]>; + : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let isCompare = 1; +} class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls1:$src1, cls2:$src2)]>; + : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let isCompare = 1; +} class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, imm:$src2)]>; + : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> - : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, imm:$src2)]>; + : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> - : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load pcrel32:$src2))]> { + : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), + mnemonic#"\t$R1, $I2", + [(operator cls:$R1, (load pcrel32:$I2))]> { + let isCompare = 1; let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -773,77 +1150,92 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, } class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> - : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load mode:$src2))]> { + : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; let mayLoad = 1; + let AccessBytes = bytes; } class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> - : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load bdxaddr12only:$src2))]> { + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; let mayLoad = 1; + let AccessBytes = bytes; } class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> - : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2), - mnemonic#"\t$src1, $src2", - [(operator cls:$src1, (load mode:$src2))]> { + : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, (load mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; let mayLoad = 1; + let AccessBytes = bytes; } multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, - SDPatternOperator load> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : CompareRX<mnemonic, rxOpcode, operator, cls, - load, bdxaddr12pair>; - let PairType = "20" in + load, bytes, bdxaddr12pair>; + let DispSize = "20" in def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls, - load, bdxaddr20pair>; + load, bytes, bdxaddr20pair>; } } class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm, AddressingMode mode = bdaddr12only> - : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load mode:$addr), imm:$src)]> { + : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { + let isCompare = 1; let mayLoad = 1; } class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm> - : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load bdaddr12only:$addr), imm:$src)]> { + : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load bdaddr12only:$BD1), imm:$I2)]> { + let isCompare = 1; let mayLoad = 1; } class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm, AddressingMode mode = bdaddr20only> - : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), - mnemonic#"\t$addr, $src", - [(operator (load mode:$addr), imm:$src)]> { + : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), + mnemonic#"\t$BD1, $I2", + [(operator (load mode:$BD1), imm:$I2)]> { + let isCompare = 1; let mayLoad = 1; } multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm> { - let Function = mnemonic in { - let PairType = "12" in + let DispKey = mnemonic in { + let DispSize = "12" in def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>; - let PairType = "20" in + let DispSize = "20" in def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm, bdaddr20pair>; } @@ -851,65 +1243,94 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> - : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3), - mnemonic#"\t$dst, $src2, $src3", - [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2), + mnemonic#"r\t$R1, $R3, $R2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> { + let OpKey = mnemonic ## cls; + let OpType = "reg"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; } class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> - : InstRXF<opcode, (outs cls:$dst), - (ins cls:$src1, cls:$src2, bdxaddr12only:$src3), - mnemonic#"\t$dst, $src2, $src3", - [(set cls:$dst, (operator cls:$src1, cls:$src2, - (load bdxaddr12only:$src3)))]> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> + : InstRXF<opcode, (outs cls:$R1), + (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $R3, $XBD2", + [(set cls:$R1, (operator cls:$R1src, cls:$R3, + (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr12only> - : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), - mnemonic#"\t$dst, $new, $ptr", - [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { - let Constraints = "$old = $dst"; - let DisableEncoding = "$old"; + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; let mayStore = 1; } class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr20only> - : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), - mnemonic#"\t$dst, $new, $ptr", - [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { - let Constraints = "$old = $dst"; - let DisableEncoding = "$old"; + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; let mayLoad = 1; let mayStore = 1; } multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>; - let PairType = "20" in + let DispSize = "20" in def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>; } } class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRIEf<opcode, (outs cls1:$dst), - (ins cls1:$src1, cls2:$src2, - uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3), - mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> { - let Constraints = "$src1 = $dst"; - let DisableEncoding = "$src1"; + : InstRIEf<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), + mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator> + : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2), + mnemonic##"\t$R1, $XBD2", + [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>; + +class PrefetchRILPC<string mnemonic, bits<12> opcode, + SDPatternOperator operator> + : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2), + mnemonic##"\t$R1, $I2", + [(operator uimm8zx4:$R1, pcrel32:$I2)]> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +// A floating-point load-and test operation. Create both a normal unary +// operation and one that acts as a comparison against zero. +multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls> { + def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>; + let isCodeGenOnly = 1 in + def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>; } //===----------------------------------------------------------------------===// @@ -928,17 +1349,130 @@ class Pseudo<dag outs, dag ins, list<dag> pattern> let isCodeGenOnly = 1; } +// Like UnaryRI, but expanded after RA depending on the choice of register. +class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs cls:$R1), (ins imm:$I2), + [(set cls:$R1, (operator imm:$I2))]>; + +// Like UnaryRXY, but expanded after RA depending on the choice of register. +class UnaryRXYPseudo<string key, SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : Pseudo<(outs cls:$R1), (ins mode:$XBD2), + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = key ## cls; + let OpType = "mem"; + let mayLoad = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like UnaryRR, but expanded after RA depending on the choice of registers. +class UnaryRRPseudo<string key, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), (ins cls2:$R2), + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = key ## cls1; + let OpType = "reg"; +} + +// Like BinaryRI, but expanded after RA depending on the choice of register. +class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// Like BinaryRIE, but expanded after RA depending on the choice of register. +class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2), + [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; + +// Like BinaryRIAndK, but expanded after RA depending on the choice of register. +multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> { + let NumOpsKey = key in { + let NumOpsValue = "3" in + def K : BinaryRIEPseudo<null_frag, cls, imm>, + Requires<[FeatureHighWord, FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRIPseudo<operator, cls, imm>, + Requires<[FeatureHighWord]>; + } +} + +// Like CompareRI, but expanded after RA depending on the choice of register. +class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]>; + +// Like CompareRXY, but expanded after RA depending on the choice of register. +class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : Pseudo<(outs), (ins cls:$R1, mode:$XBD2), + [(operator cls:$R1, (load mode:$XBD2))]> { + let mayLoad = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like StoreRXY, but expanded after RA depending on the choice of register. +class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdxaddr20only> + : Pseudo<(outs), (ins cls:$R1, mode:$XBD2), + [(operator cls:$R1, mode:$XBD2)]> { + let mayStore = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like RotateSelectRIEf, but expanded after RA depending on the choice +// of registers. +class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), + []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + // Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is // the value of the PSW's 2-bit condition code field. class SelectWrapper<RegisterOperand cls> - : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, i8imm:$cc), - [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, imm:$cc))]> { + : Pseudo<(outs cls:$dst), + (ins cls:$src1, cls:$src2, uimm8zx4:$valid, uimm8zx4:$cc), + [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, + uimm8zx4:$valid, uimm8zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves - // change the PSW, the insertion requires new blocks, and the PSW cannot - // be live across them. - let Defs = [PSW]; - let Uses = [PSW]; + // change CC, the insertion requires new blocks, and CC cannot be live + // across them. + let Defs = [CC]; + let Uses = [CC]; +} + +// Stores $new to $addr if $cc is true ("" case) or false (Inv case). +multiclass CondStores<RegisterOperand cls, SDPatternOperator store, + SDPatternOperator load, AddressingMode mode> { + let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { + def "" : Pseudo<(outs), + (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + [(store (z_select_ccmask cls:$new, (load mode:$addr), + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr)]>; + def Inv : Pseudo<(outs), + (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + [(store (z_select_ccmask (load mode:$addr), cls:$new, + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr)]>; + } } // OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND @@ -947,7 +1481,7 @@ class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls, dag pat, DAGOperand operand> : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2), [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> { - let Defs = [PSW]; + let Defs = [CC]; let Has20BitOffset = 1; let mayLoad = 1; let mayStore = 1; @@ -973,7 +1507,7 @@ class AtomicLoadWBinary<SDPatternOperator operator, dag pat, ADDR32:$negbitshift, uimm32:$bitsize), [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift, ADDR32:$negbitshift, uimm32:$bitsize))]> { - let Defs = [PSW]; + let Defs = [CC]; let Has20BitOffset = 1; let mayLoad = 1; let mayStore = 1; @@ -985,3 +1519,85 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator> : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>; class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm> : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>; + +// Define an instruction that operates on two fixed-length blocks of memory, +// and associated pseudo instructions for operating on blocks of any size. +// The Sequence form uses a straight-line sequence of instructions and +// the Loop form uses a loop of length-256 instructions followed by +// another instruction to handle the excess. +multiclass MemorySS<string mnemonic, bits<8> opcode, + SDPatternOperator sequence, SDPatternOperator loop> { + def "" : InstSS<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, + bdaddr12only:$BD2), + mnemonic##"\t$BDL1, $BD2", []>; + let usesCustomInserter = 1 in { + def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(sequence bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length)]>; + def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256), + [(loop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256)]>; + } +} + +// Define an instruction that operates on two strings, both terminated +// by the character in R0. The instruction processes a CPU-determinated +// number of bytes at a time and sets CC to 3 if the instruction needs +// to be repeated. Also define a pseudo instruction that represents +// the full loop (the main instruction plus the branch on CC==3). +multiclass StringRRE<string mnemonic, bits<16> opcode, + SDPatternOperator operator> { + def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2), + (ins GR64:$R1src, GR64:$R2src), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + } + let usesCustomInserter = 1 in + def Loop : Pseudo<(outs GR64:$end), + (ins GR64:$start1, GR64:$start2, GR32:$char), + [(set GR64:$end, (operator GR64:$start1, GR64:$start2, + GR32:$char))]>; +} + +// A pseudo instruction that is a direct alias of a real instruction. +// These aliases are used in cases where a particular register operand is +// fixed or where the same instruction is used with different register sizes. +// The size parameter is the size in bytes of the associated real instruction. +class Alias<int size, dag outs, dag ins, list<dag> pattern> + : InstSystemZ<size, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +// An alias of a BinaryRI, but with different register sizes. +class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// An alias of a BinaryRIL, but with different register sizes. +class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// An alias of a CompareRI, but with different register sizes. +class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls, + Immediate imm> + : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} + +// An alias of a RotateSelectRIEf, but with different register sizes. +class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2> + : Alias<6, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> { + let Constraints = "$R1 = $R1src"; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 0718c83..acfeed8 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -12,17 +12,37 @@ //===----------------------------------------------------------------------===// #include "SystemZInstrInfo.h" +#include "SystemZTargetMachine.h" #include "SystemZInstrBuilder.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" -#define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRMAP_INFO #include "SystemZGenInstrInfo.inc" using namespace llvm; +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + +// Reg should be a 32-bit GPR. Return true if it is a high register rather +// than a low register. +static bool isHighReg(unsigned int Reg) { + if (SystemZ::GRH32BitRegClass.contains(Reg)) + return true; + assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); + return false; +} + +// Pin the vtable to this file. +void SystemZInstrInfo::anchor() {} + SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP), - RI(tm, *this) { + RI(tm), TM(tm) { } // MI is a 128-bit load or store. Split it into two 64-bit loads or stores, @@ -40,8 +60,8 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, // Set up the two 64-bit registers. MachineOperand &HighRegOp = EarlierMI->getOperand(0); MachineOperand &LowRegOp = MI->getOperand(0); - HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high)); - LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low)); + HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); + LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64)); // The address in the first (high) instruction is already correct. // Adjust the offset in the second (low) instruction. @@ -74,12 +94,104 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { OffsetMO.setImm(Offset); } +// MI is an RI-style pseudo instruction. Replace it with LowOpcode +// if the first operand is a low GR32 and HighOpcode if the first operand +// is a high GR32. ConvertHigh is true if LowOpcode takes a signed operand +// and HighOpcode takes an unsigned 32-bit operand. In those cases, +// MI has the same kind of operand as LowOpcode, so needs to be converted +// if HighOpcode is used. +void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode, + bool ConvertHigh) const { + unsigned Reg = MI->getOperand(0).getReg(); + bool IsHigh = isHighReg(Reg); + MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode)); + if (IsHigh && ConvertHigh) + MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm())); +} + +// MI is a three-operand RIE-style pseudo instruction. Replace it with +// LowOpcode3 if the registers are both low GR32s, otherwise use a move +// followed by HighOpcode or LowOpcode, depending on whether the target +// is a high or low GR32. +void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned LowOpcodeK, + unsigned HighOpcode) const { + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + bool DestIsHigh = isHighReg(DestReg); + bool SrcIsHigh = isHighReg(SrcReg); + if (!DestIsHigh && !SrcIsHigh) + MI->setDesc(get(LowOpcodeK)); + else { + emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(), + DestReg, SrcReg, SystemZ::LR, 32, + MI->getOperand(1).isKill()); + MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); + MI->getOperand(1).setReg(DestReg); + } +} + +// MI is an RXY-style pseudo instruction. Replace it with LowOpcode +// if the first operand is a low GR32 and HighOpcode if the first operand +// is a high GR32. +void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode) const { + unsigned Reg = MI->getOperand(0).getReg(); + unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode, + MI->getOperand(2).getImm()); + MI->setDesc(get(Opcode)); +} + +// MI is an RR-style pseudo instruction that zero-extends the low Size bits +// of one GRX32 into another. Replace it with LowOpcode if both operands +// are low registers, otherwise use RISB[LH]G. +void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned Size) const { + emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(), + MI->getOperand(0).getReg(), MI->getOperand(1).getReg(), + LowOpcode, Size, MI->getOperand(1).isKill()); + MI->eraseFromParent(); +} + +// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR +// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg +// are low registers, otherwise use RISB[LH]G. Size is the number of bits +// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR). +// KillSrc is true if this move is the last use of SrcReg. +void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, unsigned LowLowOpcode, + unsigned Size, bool KillSrc) const { + unsigned Opcode; + bool DestIsHigh = isHighReg(DestReg); + bool SrcIsHigh = isHighReg(SrcReg); + if (DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBHH; + else if (DestIsHigh && !SrcIsHigh) + Opcode = SystemZ::RISBHL; + else if (!DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBLH; + else { + BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0); + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(DestReg, RegState::Undef) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); +} + // If MI is a simple load or store for a frame object, return the register // it loads or stores and set FrameIndex to the index of the frame object. // Return 0 otherwise. // // Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. -static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) { +static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, + unsigned Flag) { const MCInstrDesc &MCID = MI->getDesc(); if ((MCID.TSFlags & Flag) && MI->getOperand(1).isFI() && @@ -101,6 +213,31 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore); } +bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI, + int &DestFrameIndex, + int &SrcFrameIndex) const { + // Check for MVC 0(Length,FI1),0(FI2) + const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo(); + if (MI->getOpcode() != SystemZ::MVC || + !MI->getOperand(0).isFI() || + MI->getOperand(1).getImm() != 0 || + !MI->getOperand(3).isFI() || + MI->getOperand(4).getImm() != 0) + return false; + + // Check that Length covers the full slots. + int64_t Length = MI->getOperand(2).getImm(); + unsigned FI1 = MI->getOperand(0).getIndex(); + unsigned FI2 = MI->getOperand(3).getIndex(); + if (MFI->getObjectSize(FI1) != Length || + MFI->getObjectSize(FI2) != Length) + return false; + + DestFrameIndex = FI1; + SrcFrameIndex = FI2; + return true; +} + bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -123,19 +260,22 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // A terminator that isn't a branch can't easily be handled by this // analysis. - unsigned ThisCond; - const MachineOperand *ThisTarget; - if (!isBranch(I, ThisCond, ThisTarget)) + if (!I->isBranch()) return true; // Can't handle indirect branches. - if (!ThisTarget->isMBB()) + SystemZII::Branch Branch(getBranchInfo(I)); + if (!Branch.Target->isMBB()) + return true; + + // Punt on compound branches. + if (Branch.Type != SystemZII::BranchNormal) return true; - if (ThisCond == SystemZ::CCMASK_ANY) { + if (Branch.CCMask == SystemZ::CCMASK_ANY) { // Handle unconditional branches. if (!AllowModify) { - TBB = ThisTarget->getMBB(); + TBB = Branch.Target->getMBB(); continue; } @@ -147,7 +287,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, FBB = 0; // Delete the JMP if it's equivalent to a fall-through. - if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) { + if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) { TBB = 0; I->eraseFromParent(); I = MBB.end(); @@ -155,7 +295,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // TBB is used to indicate the unconditinal destination. - TBB = ThisTarget->getMBB(); + TBB = Branch.Target->getMBB(); continue; } @@ -163,26 +303,28 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (Cond.empty()) { // FIXME: add X86-style branch swap FBB = TBB; - TBB = ThisTarget->getMBB(); - Cond.push_back(MachineOperand::CreateImm(ThisCond)); + TBB = Branch.Target->getMBB(); + Cond.push_back(MachineOperand::CreateImm(Branch.CCValid)); + Cond.push_back(MachineOperand::CreateImm(Branch.CCMask)); continue; } // Handle subsequent conditional branches. - assert(Cond.size() == 1); - assert(TBB); + assert(Cond.size() == 2 && TBB && "Should have seen a conditional branch"); // Only handle the case where all conditional branches branch to the same // destination. - if (TBB != ThisTarget->getMBB()) + if (TBB != Branch.Target->getMBB()) return true; // If the conditions are the same, we can leave them alone. - unsigned OldCond = Cond[0].getImm(); - if (OldCond == ThisCond) + unsigned OldCCValid = Cond[0].getImm(); + unsigned OldCCMask = Cond[1].getImm(); + if (OldCCValid == Branch.CCValid && OldCCMask == Branch.CCMask) continue; // FIXME: Try combining conditions like X86 does. Should be easy on Z! + return false; } return false; @@ -197,11 +339,9 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { --I; if (I->isDebugValue()) continue; - unsigned Cond; - const MachineOperand *Target; - if (!isBranch(I, Cond, Target)) + if (!I->isBranch()) break; - if (!Target->isMBB()) + if (!getBranchInfo(I).Target->isMBB()) break; // Remove the branch. I->eraseFromParent(); @@ -212,6 +352,13 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return Count; } +bool SystemZInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Invalid condition"); + Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm()); + return false; +} + unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -223,30 +370,185 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 1 || Cond.size() == 0) && + assert((Cond.size() == 2 || Cond.size() == 0) && "SystemZ branch conditions have one component!"); if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB); + BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB); return 1; } // Conditional branch. unsigned Count = 0; - unsigned CC = Cond[0].getImm(); - BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB); + unsigned CCValid = Cond[0].getImm(); + unsigned CCMask = Cond[1].getImm(); + BuildMI(&MBB, DL, get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(TBB); ++Count; if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB); + BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB); ++Count; } return Count; } +bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + assert(MI->isCompare() && "Caller should have checked for a comparison"); + + if (MI->getNumExplicitOperands() == 2 && + MI->getOperand(0).isReg() && + MI->getOperand(1).isImm()) { + SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; + Value = MI->getOperand(1).getImm(); + Mask = ~0; + return true; + } + + return false; +} + +// If Reg is a virtual register, return its definition, otherwise return null. +static MachineInstr *getDef(unsigned Reg, + const MachineRegisterInfo *MRI) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return 0; + return MRI->getUniqueVRegDef(Reg); +} + +// Return true if MI is a shift of type Opcode by Imm bits. +static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) { + return (MI->getOpcode() == Opcode && + !MI->getOperand(2).getReg() && + MI->getOperand(3).getImm() == Imm); +} + +// If the destination of MI has no uses, delete it as dead. +static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { + if (MRI->use_nodbg_empty(MI->getOperand(0).getReg())) + MI->eraseFromParent(); +} + +// Compare compares SrcReg against zero. Check whether SrcReg contains +// the result of an IPM sequence whose input CC survives until Compare, +// and whether Compare is therefore redundant. Delete it and return +// true if so. +static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + MachineInstr *LGFR = 0; + MachineInstr *RLL = getDef(SrcReg, MRI); + if (RLL && RLL->getOpcode() == SystemZ::LGFR) { + LGFR = RLL; + RLL = getDef(LGFR->getOperand(1).getReg(), MRI); + } + if (!RLL || !isShift(RLL, SystemZ::RLL, 31)) + return false; + + MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI); + if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC)) + return false; + + MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI); + if (!IPM || IPM->getOpcode() != SystemZ::IPM) + return false; + + // Check that there are no assignments to CC between the IPM and Compare, + if (IPM->getParent() != Compare->getParent()) + return false; + MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MachineInstr *MI = MBBI; + if (MI->modifiesRegister(SystemZ::CC, TRI)) + return false; + } + + Compare->eraseFromParent(); + if (LGFR) + eraseIfDead(LGFR, MRI); + eraseIfDead(RLL, MRI); + eraseIfDead(SRL, MRI); + eraseIfDead(IPM, MRI); + + return true; +} + +bool +SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const { + assert(!SrcReg2 && "Only optimizing constant comparisons so far"); + bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; + if (Value == 0 && + !IsLogical && + removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo())) + return true; + return false; +} + +// If Opcode is a move that has a conditional variant, return that variant, +// otherwise return 0. +static unsigned getConditionalMove(unsigned Opcode) { + switch (Opcode) { + case SystemZ::LR: return SystemZ::LOCR; + case SystemZ::LGR: return SystemZ::LOCGR; + default: return 0; + } +} + +bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + if (TM.getSubtargetImpl()->hasLoadStoreOnCond() && + getConditionalMove(Opcode)) + return true; + return false; +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const { + // For now only convert single instructions. + return NumCycles == 1; +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, unsigned ExtraPredCyclesF, + const BranchProbability &Probability) const { + // For now avoid converting mutually-exclusive cases. + return false; +} + +bool SystemZInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + assert(Pred.size() == 2 && "Invalid condition"); + unsigned CCValid = Pred[0].getImm(); + unsigned CCMask = Pred[1].getImm(); + assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); + unsigned Opcode = MI->getOpcode(); + if (TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (unsigned CondOpcode = getConditionalMove(Opcode)) { + MI->setDesc(get(CondOpcode)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit);; + return true; + } + } + return false; +} + void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, @@ -254,18 +556,21 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { // Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too. if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) { - copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high), - RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc); - copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low), - RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc); + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64), + RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc); + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64), + RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc); + return; + } + + if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { + emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc); return; } // Everything else needs only one instruction. unsigned Opcode; - if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg)) - Opcode = SystemZ::LR; - else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) + if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LGR; else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LER; @@ -313,6 +618,256 @@ SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, FrameIdx); } +// Return true if MI is a simple load or store with a 12-bit displacement +// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { + const MCInstrDesc &MCID = MI->getDesc(); + return ((MCID.TSFlags & Flag) && + isUInt<12>(MI->getOperand(2).getImm()) && + MI->getOperand(3).getReg() == 0); +} + +namespace { + struct LogicOp { + LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {} + LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize) + : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {} + + operator bool() const { return RegSize; } + + unsigned RegSize, ImmLSB, ImmSize; + }; +} + +static LogicOp interpretAndImmediate(unsigned Opcode) { + switch (Opcode) { + case SystemZ::NILMux: return LogicOp(32, 0, 16); + case SystemZ::NIHMux: return LogicOp(32, 16, 16); + case SystemZ::NILL64: return LogicOp(64, 0, 16); + case SystemZ::NILH64: return LogicOp(64, 16, 16); + case SystemZ::NIHL64: return LogicOp(64, 32, 16); + case SystemZ::NIHH64: return LogicOp(64, 48, 16); + case SystemZ::NIFMux: return LogicOp(32, 0, 32); + case SystemZ::NILF64: return LogicOp(64, 0, 32); + case SystemZ::NIHF64: return LogicOp(64, 32, 32); + default: return LogicOp(); + } +} + +// Used to return from convertToThreeAddress after replacing two-address +// instruction OldMI with three-address instruction NewMI. +static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, + MachineInstr *NewMI, + LiveVariables *LV) { + if (LV) { + unsigned NumOps = OldMI->getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = OldMI->getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI); + } + } + return NewMI; +} + +MachineInstr * +SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + MachineBasicBlock *MBB = MI->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + unsigned Opcode = MI->getOpcode(); + unsigned NumOps = MI->getNumOperands(); + + // Try to convert something like SLL into SLLK, if supported. + // We prefer to keep the two-operand form where possible both + // because it tends to be shorter and because some instructions + // have memory forms that can be used during spilling. + if (TM.getSubtargetImpl()->hasDistinctOps()) { + MachineOperand &Dest = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DestReg = Dest.getReg(); + unsigned SrcReg = Src.getReg(); + // AHIMux is only really a three-operand instruction when both operands + // are low registers. Try to constrain both operands to be low if + // possible. + if (Opcode == SystemZ::AHIMux && + TargetRegisterInfo::isVirtualRegister(DestReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg) && + MRI.getRegClass(DestReg)->contains(SystemZ::R1L) && + MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) { + MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass); + MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass); + } + int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); + if (ThreeOperandOpcode >= 0) { + MachineInstrBuilder MIB = + BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode)) + .addOperand(Dest); + // Keep the kill state, but drop the tied flag. + MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); + // Keep the remaining operands as-is. + for (unsigned I = 2; I < NumOps; ++I) + MIB.addOperand(MI->getOperand(I)); + return finishConvertToThreeAddress(MI, MIB, LV); + } + } + + // Try to convert an AND into an RISBG-type instruction. + if (LogicOp And = interpretAndImmediate(Opcode)) { + uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB; + // AND IMMEDIATE leaves the other bits of the register unchanged. + Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); + unsigned Start, End; + if (isRxSBGMask(Imm, And.RegSize, Start, End)) { + unsigned NewOpcode; + if (And.RegSize == 64) + NewOpcode = SystemZ::RISBG; + else { + NewOpcode = SystemZ::RISBMux; + Start &= 31; + End &= 31; + } + MachineOperand &Dest = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode)) + .addOperand(Dest).addReg(0) + .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()) + .addImm(Start).addImm(End + 128).addImm(0); + return finishConvertToThreeAddress(MI, MIB, LV); + } + } + return 0; +} + +MachineInstr * +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); + unsigned Opcode = MI->getOpcode(); + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && + isInt<8>(MI->getOperand(2).getImm()) && + !MI->getOperand(3).getReg()) { + // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST + return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI)) + .addFrameIndex(FrameIndex).addImm(0) + .addImm(MI->getOperand(2).getImm()); + } + return 0; + } + + // All other cases require a single operand. + if (Ops.size() != 1) + return 0; + + unsigned OpNum = Ops[0]; + assert(Size == MF.getRegInfo() + .getRegClass(MI->getOperand(OpNum).getReg())->getSize() && + "Invalid size combination"); + + if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && + OpNum == 0 && + isInt<8>(MI->getOperand(2).getImm())) { + // A(G)HI %reg, CONST -> A(G)SI %mem, CONST + Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI); + return BuildMI(MF, MI->getDebugLoc(), get(Opcode)) + .addFrameIndex(FrameIndex).addImm(0) + .addImm(MI->getOperand(2).getImm()); + } + + if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { + bool Op0IsGPR = (Opcode == SystemZ::LGDR); + bool Op1IsGPR = (Opcode == SystemZ::LDGR); + // If we're spilling the destination of an LDGR or LGDR, store the + // source register instead. + if (OpNum == 0) { + unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; + return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode)) + .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex) + .addImm(0).addReg(0); + } + // If we're spilling the source of an LDGR or LGDR, load the + // destination register instead. + if (OpNum == 1) { + unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; + unsigned Dest = MI->getOperand(0).getReg(); + return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest) + .addFrameIndex(FrameIndex).addImm(0).addReg(0); + } + } + + // Look for cases where the source of a simple store or the destination + // of a simple load is being spilled. Try to use MVC instead. + // + // Although MVC is in practice a fast choice in these cases, it is still + // logically a bytewise copy. This means that we cannot use it if the + // load or store is volatile. We also wouldn't be able to use MVC if + // the two memories partially overlap, but that case cannot occur here, + // because we know that one of the memories is a full frame index. + // + // For performance reasons, we also want to avoid using MVC if the addresses + // might be equal. We don't worry about that case here, because spill slot + // coloring happens later, and because we have special code to remove + // MVCs that turn out to be redundant. + if (OpNum == 0 && MI->hasOneMemOperand()) { + MachineMemOperand *MMO = *MI->memoperands_begin(); + if (MMO->getSize() == Size && !MMO->isVolatile()) { + // Handle conversion of loads. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { + return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + .addFrameIndex(FrameIndex).addImm(0).addImm(Size) + .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) + .addMemOperand(MMO); + } + // Handle conversion of stores. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { + return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) + .addImm(Size).addFrameIndex(FrameIndex).addImm(0) + .addMemOperand(MMO); + } + } + } + + // If the spilled operand is the final one, try to change <INSN>R + // into <INSN>. + int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode >= 0) { + unsigned NumOps = MI->getNumExplicitOperands(); + if (OpNum == NumOps - 1) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode)); + for (unsigned I = 0; I < OpNum; ++I) + MIB.addOperand(MI->getOperand(I)); + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + return MIB; + } + } + + return 0; +} + +MachineInstr * +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; +} + bool SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch (MI->getOpcode()) { @@ -332,6 +887,138 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { splitMove(MI, SystemZ::STD); return true; + case SystemZ::LBMux: + expandRXYPseudo(MI, SystemZ::LB, SystemZ::LBH); + return true; + + case SystemZ::LHMux: + expandRXYPseudo(MI, SystemZ::LH, SystemZ::LHH); + return true; + + case SystemZ::LLCRMux: + expandZExtPseudo(MI, SystemZ::LLCR, 8); + return true; + + case SystemZ::LLHRMux: + expandZExtPseudo(MI, SystemZ::LLHR, 16); + return true; + + case SystemZ::LLCMux: + expandRXYPseudo(MI, SystemZ::LLC, SystemZ::LLCH); + return true; + + case SystemZ::LLHMux: + expandRXYPseudo(MI, SystemZ::LLH, SystemZ::LLHH); + return true; + + case SystemZ::LMux: + expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH); + return true; + + case SystemZ::STCMux: + expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); + return true; + + case SystemZ::STHMux: + expandRXYPseudo(MI, SystemZ::STH, SystemZ::STHH); + return true; + + case SystemZ::STMux: + expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH); + return true; + + case SystemZ::LHIMux: + expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true); + return true; + + case SystemZ::IIFMux: + expandRIPseudo(MI, SystemZ::IILF, SystemZ::IIHF, false); + return true; + + case SystemZ::IILMux: + expandRIPseudo(MI, SystemZ::IILL, SystemZ::IIHL, false); + return true; + + case SystemZ::IIHMux: + expandRIPseudo(MI, SystemZ::IILH, SystemZ::IIHH, false); + return true; + + case SystemZ::NIFMux: + expandRIPseudo(MI, SystemZ::NILF, SystemZ::NIHF, false); + return true; + + case SystemZ::NILMux: + expandRIPseudo(MI, SystemZ::NILL, SystemZ::NIHL, false); + return true; + + case SystemZ::NIHMux: + expandRIPseudo(MI, SystemZ::NILH, SystemZ::NIHH, false); + return true; + + case SystemZ::OIFMux: + expandRIPseudo(MI, SystemZ::OILF, SystemZ::OIHF, false); + return true; + + case SystemZ::OILMux: + expandRIPseudo(MI, SystemZ::OILL, SystemZ::OIHL, false); + return true; + + case SystemZ::OIHMux: + expandRIPseudo(MI, SystemZ::OILH, SystemZ::OIHH, false); + return true; + + case SystemZ::XIFMux: + expandRIPseudo(MI, SystemZ::XILF, SystemZ::XIHF, false); + return true; + + case SystemZ::TMLMux: + expandRIPseudo(MI, SystemZ::TMLL, SystemZ::TMHL, false); + return true; + + case SystemZ::TMHMux: + expandRIPseudo(MI, SystemZ::TMLH, SystemZ::TMHH, false); + return true; + + case SystemZ::AHIMux: + expandRIPseudo(MI, SystemZ::AHI, SystemZ::AIH, false); + return true; + + case SystemZ::AHIMuxK: + expandRIEPseudo(MI, SystemZ::AHI, SystemZ::AHIK, SystemZ::AIH); + return true; + + case SystemZ::AFIMux: + expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); + return true; + + case SystemZ::CFIMux: + expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false); + return true; + + case SystemZ::CLFIMux: + expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false); + return true; + + case SystemZ::CMux: + expandRXYPseudo(MI, SystemZ::C, SystemZ::CHF); + return true; + + case SystemZ::CLMux: + expandRXYPseudo(MI, SystemZ::CL, SystemZ::CLHF); + return true; + + case SystemZ::RISBMux: { + bool DestIsHigh = isHighReg(MI->getOperand(0).getReg()); + bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg()); + if (SrcIsHigh == DestIsHigh) + MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL)); + else { + MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH)); + MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32); + } + return true; + } + case SystemZ::ADJDYNALLOC: splitAdjDynAlloc(MI); return true; @@ -341,32 +1028,60 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { } } -bool SystemZInstrInfo:: -ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { - assert(Cond.size() == 1 && "Invalid branch condition!"); - Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY); - return false; +uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const { + if (MI->getOpcode() == TargetOpcode::INLINEASM) { + const MachineFunction *MF = MI->getParent()->getParent(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + return MI->getDesc().getSize(); } -bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond, - const MachineOperand *&Target) const { +SystemZII::Branch +SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const { switch (MI->getOpcode()) { case SystemZ::BR: case SystemZ::J: case SystemZ::JG: - Cond = SystemZ::CCMASK_ANY; - Target = &MI->getOperand(0); - return true; + return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY, + SystemZ::CCMASK_ANY, &MI->getOperand(0)); case SystemZ::BRC: case SystemZ::BRCL: - Cond = MI->getOperand(0).getImm(); - Target = &MI->getOperand(1); - return true; + return SystemZII::Branch(SystemZII::BranchNormal, + MI->getOperand(0).getImm(), + MI->getOperand(1).getImm(), &MI->getOperand(2)); + + case SystemZ::BRCT: + return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); + + case SystemZ::BRCTG: + return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); + + case SystemZ::CIJ: + case SystemZ::CRJ: + return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + + case SystemZ::CLIJ: + case SystemZ::CLRJ: + return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + + case SystemZ::CGIJ: + case SystemZ::CGRJ: + return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + + case SystemZ::CLGIJ: + case SystemZ::CLGRJ: + return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); default: - assert(!MI->getDesc().isBranch() && "Unknown branch opcode"); - return false; + llvm_unreachable("Unrecognized branch opcode"); } } @@ -375,7 +1090,13 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, unsigned &StoreOpcode) const { if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) { LoadOpcode = SystemZ::L; - StoreOpcode = SystemZ::ST32; + StoreOpcode = SystemZ::ST; + } else if (RC == &SystemZ::GRH32BitRegClass) { + LoadOpcode = SystemZ::LFH; + StoreOpcode = SystemZ::STFH; + } else if (RC == &SystemZ::GRX32BitRegClass) { + LoadOpcode = SystemZ::LMux; + StoreOpcode = SystemZ::STMux; } else if (RC == &SystemZ::GR64BitRegClass || RC == &SystemZ::ADDR64BitRegClass) { LoadOpcode = SystemZ::LG; @@ -424,6 +1145,88 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, return 0; } +unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { + switch (Opcode) { + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + case SystemZ::LER: return SystemZ::LTEBR; + case SystemZ::LDR: return SystemZ::LTDBR; + case SystemZ::LXR: return SystemZ::LTXBR; + default: return 0; + } +} + +// Return true if Mask matches the regexp 0*1+0*, given that zero masks +// have already been filtered out. Store the first set bit in LSB and +// the number of set bits in Length if so. +static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) { + unsigned First = findFirstSet(Mask); + uint64_t Top = (Mask >> First) + 1; + if ((Top & -Top) == Top) { + LSB = First; + Length = findFirstSet(Top); + return true; + } + return false; +} + +bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const { + // Reject trivial all-zero masks. + if (Mask == 0) + return false; + + // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of + // the msb and End specifies the index of the lsb. + unsigned LSB, Length; + if (isStringOfOnes(Mask, LSB, Length)) { + Start = 63 - (LSB + Length - 1); + End = 63 - LSB; + return true; + } + + // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb + // of the low 1s and End specifies the lsb of the high 1s. + if (isStringOfOnes(Mask ^ allOnes(BitSize), LSB, Length)) { + assert(LSB > 0 && "Bottom bit must be set"); + assert(LSB + Length < BitSize && "Top bit must be set"); + Start = 63 - (LSB - 1); + End = 63 - (LSB + Length); + return true; + } + + return false; +} + +unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode, + const MachineInstr *MI) const { + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRJ; + case SystemZ::CGR: + return SystemZ::CGRJ; + case SystemZ::CHI: + return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0; + case SystemZ::CGHI: + return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0; + case SystemZ::CLR: + return SystemZ::CLRJ; + case SystemZ::CLGR: + return SystemZ::CLGRJ; + case SystemZ::CLFI: + return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0; + case SystemZ::CLGFI: + return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0; + default: + return 0; + } +} + void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Reg, uint64_t Value) const { diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 0fc4761..be4c8fe 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -28,12 +28,31 @@ class SystemZTargetMachine; namespace SystemZII { enum { // See comments in SystemZInstrFormats.td. - SimpleBDXLoad = (1 << 0), - SimpleBDXStore = (1 << 1), - Has20BitOffset = (1 << 2), - HasIndex = (1 << 3), - Is128Bit = (1 << 4) + SimpleBDXLoad = (1 << 0), + SimpleBDXStore = (1 << 1), + Has20BitOffset = (1 << 2), + HasIndex = (1 << 3), + Is128Bit = (1 << 4), + AccessSizeMask = (31 << 5), + AccessSizeShift = 5, + CCValuesMask = (15 << 10), + CCValuesShift = 10, + CompareZeroCCMaskMask = (15 << 14), + CompareZeroCCMaskShift = 14, + CCMaskFirst = (1 << 18), + CCMaskLast = (1 << 19), + IsLogical = (1 << 20) }; + static inline unsigned getAccessSize(unsigned int Flags) { + return (Flags & AccessSizeMask) >> AccessSizeShift; + } + static inline unsigned getCCValues(unsigned int Flags) { + return (Flags & CCValuesMask) >> CCValuesShift; + } + static inline unsigned getCompareZeroCCMask(unsigned int Flags) { + return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift; + } + // SystemZ MachineOperand target flags. enum { // Masks out the bits for the access model. @@ -42,14 +61,74 @@ namespace SystemZII { // @GOT (aka @GOTENT) MO_GOT = (1 << 0) }; + // Classifies a branch. + enum BranchType { + // An instruction that branches on the current value of CC. + BranchNormal, + + // An instruction that peforms a 32-bit signed comparison and branches + // on the result. + BranchC, + + // An instruction that peforms a 32-bit unsigned comparison and branches + // on the result. + BranchCL, + + // An instruction that peforms a 64-bit signed comparison and branches + // on the result. + BranchCG, + + // An instruction that peforms a 64-bit unsigned comparison and branches + // on the result. + BranchCLG, + + // An instruction that decrements a 32-bit register and branches if + // the result is nonzero. + BranchCT, + + // An instruction that decrements a 64-bit register and branches if + // the result is nonzero. + BranchCTG + }; + // Information about a branch instruction. + struct Branch { + // The type of the branch. + BranchType Type; + + // CCMASK_<N> is set if CC might be equal to N. + unsigned CCValid; + + // CCMASK_<N> is set if the branch should be taken when CC == N. + unsigned CCMask; + + // The target of the branch. + const MachineOperand *Target; + + Branch(BranchType type, unsigned ccValid, unsigned ccMask, + const MachineOperand *target) + : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {} + }; } class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; + SystemZTargetMachine &TM; void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; - + void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode, bool ConvertHigh) const; + void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned LowOpcodeK, unsigned HighOpcode) const; + void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode) const; + void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned Size) const; + void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; + virtual void anchor(); + public: explicit SystemZInstrInfo(SystemZTargetMachine &TM); @@ -58,6 +137,8 @@ public: int &FrameIndex) const LLVM_OVERRIDE; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const LLVM_OVERRIDE; + virtual bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex, + int &SrcFrameIndex) const LLVM_OVERRIDE; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -68,6 +149,29 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const LLVM_OVERRIDE; + bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, int &Value) const + LLVM_OVERRIDE; + bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int Mask, int Value, + const MachineRegisterInfo *MRI) const LLVM_OVERRIDE; + virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE; + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const + LLVM_OVERRIDE; + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, + unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, + unsigned ExtraPredCyclesF, + const BranchProbability &Probability) const + LLVM_OVERRIDE; + virtual bool + PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const + LLVM_OVERRIDE; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -84,6 +188,18 @@ public: unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual MachineInstr * + convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + virtual MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + virtual MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE; virtual bool @@ -93,13 +209,15 @@ public: // Return the SystemZRegisterInfo, which this class owns. const SystemZRegisterInfo &getRegisterInfo() const { return RI; } + // Return the size in bytes of MI. + uint64_t getInstSizeInBytes(const MachineInstr *MI) const; + // Return true if MI is a conditional or unconditional branch. // When returning true, set Cond to the mask of condition-code // values on which the instruction will branch, and set Target // to the operand that contains the branch target. This target // can be a register or a basic block. - bool isBranch(const MachineInstr *MI, unsigned &Cond, - const MachineOperand *&Target) const; + SystemZII::Branch getBranchInfo(const MachineInstr *MI) const; // Get the load and store opcodes for a given register class. void getLoadStoreOpcodes(const TargetRegisterClass *RC, @@ -112,6 +230,22 @@ public: // exists. unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // If Opcode is a load instruction that has a LOAD AND TEST form, + // return the opcode for the testing form, otherwise return 0. + unsigned getLoadAndTest(unsigned Opcode) const; + + // Return true if ROTATE AND ... SELECTED BITS can be used to select bits + // Mask of the R2 operand, given that only the low BitSize bits of Mask are + // significant. Set Start and End to the I3 and I4 operands if so. + bool isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const; + + // If Opcode is a COMPARE opcode for which an associated COMPARE AND + // BRANCH exists, return the opcode for the latter, otherwise return 0. + // MI, if nonnull, is the compare instruction. + unsigned getCompareAndBranch(unsigned Opcode, + const MachineInstr *MI = 0) const; + // Emit code before MBBI in MI to move immediate value Value into // physical register Reg. void loadImmediate(MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 7ffa382..6524e44 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -32,77 +32,202 @@ let neverHasSideEffects = 1 in { // Control flow instructions //===----------------------------------------------------------------------===// -// A return instruction. R1 is the condition-code mask (all 1s) -// and R2 is the target address, which is always stored in %r14. -let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, - R1 = 15, R2 = 14, isCodeGenOnly = 1 in { - def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>; -} +// A return instruction (br %r14). +let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in + def Return : Alias<2, (outs), (ins), [(z_retflag)]>; // Unconditional branches. R1 is the condition-code mask (all 1s). let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { let isIndirectBranch = 1 in - def BR : InstRR<0x07, (outs), (ins ADDR64:$dst), - "br\t$dst", [(brind ADDR64:$dst)]>; + def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), + "br\t$R2", [(brind ADDR64:$R2)]>; - // An assembler extended mnemonic for BRC. Use a separate instruction for - // the asm parser, so that we don't relax Js to external symbols into JGs. - let isCodeGenOnly = 1 in - def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; - let isAsmParserOnly = 1 in - def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; + // An assembler extended mnemonic for BRC. + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", + [(br bb:$I2)]>; // An assembler extended mnemonic for BRCL. (The extension is "G" // rather than "L" because "JL" is "Jump if Less".) - def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst), - "jg\t$dst", [(br bb:$dst)]>; + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>; } // Conditional branches. It's easier for LLVM to handle these branches // in their raw BRC/BRCL form, with the 4-bit condition-code mask being // the first operand. It seems friendlier to use mnemonic forms like // JE and JLH when writing out the assembly though. -multiclass CondBranches<Operand imm, string short, string long> { - let isBranch = 1, isTerminator = 1, Uses = [PSW] in { - def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>; - def L : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>; +let isBranch = 1, isTerminator = 1, Uses = [CC] in { + let isCodeGenOnly = 1, CCMaskFirst = 1 in { + def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1, + brtarget16:$I2), "j$R1\t$I2", + [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>; + def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1, + brtarget32:$I2), "jg$R1\t$I2", []>; + } + def AsmBRC : InstRI<0xA74, (outs), (ins uimm8zx4:$R1, brtarget16:$I2), + "brc\t$R1, $I2", []>; + def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2), + "brcl\t$R1, $I2", []>; + def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2), + "bcr\t$R1, $R2", []>; +} + +// Fused compare-and-branch instructions. As for normal branches, +// we handle these instructions internally in their raw CRJ-like form, +// but use assembly macros like CRJE when writing them out. +// +// These instructions do not use or clobber the condition codes. +// We nevertheless pretend that they clobber CC, so that we can lower +// them to separate comparisons and BRCLs if the branch ends up being +// out of range. +multiclass CompareBranches<Operand ccmask, string pos1, string pos2> { + let isBranch = 1, isTerminator = 1, Defs = [CC] in { + def RJ : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, + brtarget16:$RI4), + "crj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def GRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, + brtarget16:$RI4), + "cgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def IJ : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "cij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, + brtarget16:$RI4), + "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, + brtarget16:$RI4), + "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; } } let isCodeGenOnly = 1 in - defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">; -let isAsmParserOnly = 1 in - defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">; - -def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>; - -// Define AsmParser mnemonics for each condition code. -multiclass CondExtendedMnemonic<bits<4> Cond, string name> { - let R1 = Cond in { - def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst), - "j"##name##"\t$dst", []>; - def L : InstRIL<0xC04, (outs), (ins brtarget32:$dst), - "jg"##name##"\t$dst", []>; + defm C : CompareBranches<cond4, "$M3", "">; +defm AsmC : CompareBranches<uimm8zx4, "", "$M3, ">; + +// Define AsmParser mnemonics for each general condition-code mask +// (integer or floating-point) +multiclass CondExtendedMnemonic<bits<4> ccmask, string name> { + let R1 = ccmask in { + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), + "j"##name##"\t$I2", []>; + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + "jg"##name##"\t$I2", []>; + def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), "b"##name##"r\t$R2", []>; + } + def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>; + def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>; + def LOC : FixedCondUnaryRSY<"loc"##name, 0xEBF2, GR32, ccmask, 4>; + def LOCG : FixedCondUnaryRSY<"locg"##name, 0xEBE2, GR64, ccmask, 8>; + def STOC : FixedCondStoreRSY<"stoc"##name, 0xEBF3, GR32, ccmask, 4>; + def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>; +} +defm AsmO : CondExtendedMnemonic<1, "o">; +defm AsmH : CondExtendedMnemonic<2, "h">; +defm AsmNLE : CondExtendedMnemonic<3, "nle">; +defm AsmL : CondExtendedMnemonic<4, "l">; +defm AsmNHE : CondExtendedMnemonic<5, "nhe">; +defm AsmLH : CondExtendedMnemonic<6, "lh">; +defm AsmNE : CondExtendedMnemonic<7, "ne">; +defm AsmE : CondExtendedMnemonic<8, "e">; +defm AsmNLH : CondExtendedMnemonic<9, "nlh">; +defm AsmHE : CondExtendedMnemonic<10, "he">; +defm AsmNL : CondExtendedMnemonic<11, "nl">; +defm AsmLE : CondExtendedMnemonic<12, "le">; +defm AsmNH : CondExtendedMnemonic<13, "nh">; +defm AsmNO : CondExtendedMnemonic<14, "no">; + +// Define AsmParser mnemonics for each integer condition-code mask. +// This is like the list above, except that condition 3 is not possible +// and that the low bit of the mask is therefore always 0. This means +// that each condition has two names. Conditions "o" and "no" are not used. +// +// We don't make one of the two names an alias of the other because +// we need the custom parsing routines to select the correct register class. +multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> { + let M3 = ccmask in { + def CR : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, + brtarget16:$RI4), + "crj"##name##"\t$R1, $R2, $RI4", []>; + def CGR : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, + brtarget16:$RI4), + "cgrj"##name##"\t$R1, $R2, $RI4", []>; + def CI : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, + brtarget16:$RI4), + "cij"##name##"\t$R1, $I2, $RI4", []>; + def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, + brtarget16:$RI4), + "cgij"##name##"\t$R1, $I2, $RI4", []>; + def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, + brtarget16:$RI4), + "clrj"##name##"\t$R1, $R2, $RI4", []>; + def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, + brtarget16:$RI4), + "clgrj"##name##"\t$R1, $R2, $RI4", []>; + def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, + brtarget16:$RI4), + "clij"##name##"\t$R1, $I2, $RI4", []>; + def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, + brtarget16:$RI4), + "clgij"##name##"\t$R1, $I2, $RI4", []>; } } -let isAsmParserOnly = 1 in { - defm AsmJO : CondExtendedMnemonic<1, "o">; - defm AsmJH : CondExtendedMnemonic<2, "h">; - defm AsmJNLE : CondExtendedMnemonic<3, "nle">; - defm AsmJL : CondExtendedMnemonic<4, "l">; - defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; - defm AsmJLH : CondExtendedMnemonic<6, "lh">; - defm AsmJNE : CondExtendedMnemonic<7, "ne">; - defm AsmJE : CondExtendedMnemonic<8, "e">; - defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; - defm AsmJHE : CondExtendedMnemonic<10, "he">; - defm AsmJNL : CondExtendedMnemonic<11, "nl">; - defm AsmJLE : CondExtendedMnemonic<12, "le">; - defm AsmJNH : CondExtendedMnemonic<13, "nh">; - defm AsmJNO : CondExtendedMnemonic<14, "no">; +multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2> + : IntCondExtendedMnemonicA<ccmask, name1> { + let isAsmParserOnly = 1 in + defm Alt : IntCondExtendedMnemonicA<ccmask, name2>; +} +defm AsmJH : IntCondExtendedMnemonic<2, "h", "nle">; +defm AsmJL : IntCondExtendedMnemonic<4, "l", "nhe">; +defm AsmJLH : IntCondExtendedMnemonic<6, "lh", "ne">; +defm AsmJE : IntCondExtendedMnemonic<8, "e", "nlh">; +defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">; +defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">; + +// Decrement a register and branch if it is nonzero. These don't clobber CC, +// but we might need to split long branches into sequences that do. +let Defs = [CC] in { + def BRCT : BranchUnaryRI<"brct", 0xA76, GR32>; + def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>; } -def Select32 : SelectWrapper<GR32>; -def Select64 : SelectWrapper<GR64>; +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + +def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>; +def Select32 : SelectWrapper<GR32>; +def Select64 : SelectWrapper<GR64>; + +// We don't define 32-bit Mux stores because the low-only STOC should +// always be used if possible. +defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>, + Requires<[FeatureHighWord]>; +defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>, + Requires<[FeatureHighWord]>; +defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm CondStore32 : CondStores<GR32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + +defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32, + nonvolatile_anyextloadi32, bdxaddr20only>; +defm CondStore64 : CondStores<GR64, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; //===----------------------------------------------------------------------===// // Call instructions @@ -110,26 +235,30 @@ def Select64 : SelectWrapper<GR64>; // The definitions here are for the call-clobbered registers. let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, - F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D], - R1 = 14, isCodeGenOnly = 1 in { - def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops), - "bras\t%r14, $dst", []>; - def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops), - "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>; - def BASR : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops), - "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>; + F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in { + def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops), + [(z_call pcrel32:$I2)]>; + def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops), + [(z_call ADDR64:$R2)]>; +} + +// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards +// are argument registers and since branching to R0 is a no-op. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def CallJG : Alias<6, (outs), (ins pcrel32:$I2), + [(z_sibcall pcrel32:$I2)]>; + let Uses = [R1D] in + def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; } // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. -let isAsmParserOnly = 1 in { - def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst), - "bras\t$save, $dst", []>; - def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst), - "brasl\t$save, $dst", []>; - def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst), - "basr\t$save, $dst", []>; -} +def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), + "bras\t$R1, $I2", []>; +def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), + "brasl\t$R1, $I2", []>; +def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), + "basr\t$R1, $R2", []>; //===----------------------------------------------------------------------===// // Move instructions @@ -137,13 +266,34 @@ let isAsmParserOnly = 1 in { // Register moves. let neverHasSideEffects = 1 in { - def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; - def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; + // Expands to LR, RISBHG or RISBLG, depending on the choice of registers. + def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>, + Requires<[FeatureHighWord]>; + def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>; + def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>; +} +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LTR : UnaryRR <"lt", 0x12, null_frag, GR32, GR32>; + def LTGR : UnaryRRE<"ltg", 0xB902, null_frag, GR64, GR64>; +} + +// Move on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def LOCR : CondUnaryRRF<"loc", 0xB9F2, GR32, GR32>; + def LOCGR : CondUnaryRRF<"locg", 0xB9E2, GR64, GR64>; +} +let Uses = [CC] in { + def AsmLOCR : AsmCondUnaryRRF<"loc", 0xB9F2, GR32, GR32>; + def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>; } // Immediate moves. -let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { - // 16-bit sign-extended immediates. +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { + // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF, + // deopending on the choice of register. + def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>, + Requires<[FeatureHighWord]>; def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>; def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>; @@ -161,11 +311,13 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { // Register loads. let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { - defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>; - def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; - - def LG : UnaryRXY<"lg", 0xE304, load, GR64>; - def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; + // Expands to L, LY or LFH, depending on the choice of register. + def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>, + Requires<[FeatureHighWord]>; + defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>; + def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>, + Requires<[FeatureHighWord]>; + def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -174,16 +326,35 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { [(set GR128:$dst, (load bdxaddr20only128:$src))]>; } } +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>; + def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>; +} + +let canFoldAsLoad = 1 in { + def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; + def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; +} + +// Load on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def LOC : CondUnaryRSY<"loc", 0xEBF2, nonvolatile_load, GR32, 4>; + def LOCG : CondUnaryRSY<"locg", 0xEBE2, nonvolatile_load, GR64, 8>; +} +let Uses = [CC] in { + def AsmLOC : AsmCondUnaryRSY<"loc", 0xEBF2, GR32, 4>; + def AsmLOCG : AsmCondUnaryRSY<"locg", 0xEBE2, GR64, 8>; +} // Register stores. let SimpleBDXStore = 1 in { - let isCodeGenOnly = 1 in { - defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>; - def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; - } - - def STG : StoreRXY<"stg", 0xE324, store, GR64>; - def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + // Expands to ST, STY or STFH, depending on the choice of register. + def STMux : StoreRXYPseudo<store, GRX32, 4>, + Requires<[FeatureHighWord]>; + defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; + def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>, + Requires<[FeatureHighWord]>; + def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -192,6 +363,18 @@ let SimpleBDXStore = 1 in { [(store GR128:$src, bdxaddr20only128:$dst)]>; } } +def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; +def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + +// Store on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def STOC : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} +let Uses = [CC] in { + def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def AsmSTOCG : AsmCondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} // 8-bit immediate stores to 8-bit fields. defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>; @@ -201,50 +384,70 @@ def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>; def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>; def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; +// Memory-to-memory moves. +let mayLoad = 1, mayStore = 1 in + defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>; + +// String moves. +let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in + defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; + //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// +// +// Note that putting these before zero extensions mean that we will prefer +// them for anyextload*. There's not really much to choose between the two +// either way, but signed-extending loads have a short LH and a long LHY, +// while zero-extending loads have only the long LLH. +// +//===----------------------------------------------------------------------===// // 32-bit extensions from registers. let neverHasSideEffects = 1 in { - def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; - def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; + def LBR : UnaryRRE<"lb", 0xB926, sext8, GR32, GR32>; + def LHR : UnaryRRE<"lh", 0xB927, sext16, GR32, GR32>; } // 64-bit extensions from registers. let neverHasSideEffects = 1 in { - def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; - def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; - def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; + def LGBR : UnaryRRE<"lgb", 0xB906, sext8, GR64, GR64>; + def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>; + def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>; } +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>; // Match 32-to-64-bit sign extensions in which the source is already // in a 64-bit register. def : Pat<(sext_inreg GR64:$src, i32), - (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; - -// 32-bit extensions from memory. -def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>; -defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>; -def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>; + (LGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>; + +// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH, +// depending on the choice of register. +def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>, + Requires<[FeatureHighWord]>; +def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>; +def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH, +// depending on the choice of register. +def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>, + Requires<[FeatureHighWord]>; +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>; +def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>; // 64-bit extensions from memory. -def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64>; -def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>; -def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>; -def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>; -def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>; - -// If the sign of a load-extend operation doesn't matter, use the signed ones. -// There's not really much to choose between the sign and zero extensions, -// but LH is more compact than LLH for small offsets. -def : Pat<(i32 (extloadi8 bdxaddr20only:$src)), (LB bdxaddr20only:$src)>; -def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH bdxaddr12pair:$src)>; -def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>; - -def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>; -def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>; -def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; +def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>; +def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>; +def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>; +def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>; +def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>; +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>; //===----------------------------------------------------------------------===// // Zero extensions @@ -252,33 +455,51 @@ def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; // 32-bit extensions from registers. let neverHasSideEffects = 1 in { - def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; - def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; + // Expands to LLCR or RISB[LH]G, depending on the choice of registers. + def LLCRMux : UnaryRRPseudo<"llc", zext8, GRX32, GRX32>, + Requires<[FeatureHighWord]>; + def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>; + // Expands to LLHR or RISB[LH]G, depending on the choice of registers. + def LLHRMux : UnaryRRPseudo<"llh", zext16, GRX32, GRX32>, + Requires<[FeatureHighWord]>; + def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>; } // 64-bit extensions from registers. let neverHasSideEffects = 1 in { - def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; - def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; - def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; + def LLGCR : UnaryRRE<"llgc", 0xB984, zext8, GR64, GR64>; + def LLGHR : UnaryRRE<"llgh", 0xB985, zext16, GR64, GR64>; + def LLGFR : UnaryRRE<"llgf", 0xB916, zext32, GR64, GR32>; } // Match 32-to-64-bit zero extensions in which the source is already // in a 64-bit register. def : Pat<(and GR64:$src, 0xffffffff), - (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; - -// 32-bit extensions from memory. -def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32>; -def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>; -def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>; + (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>; + +// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH, +// depending on the choice of register. +def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>, + Requires<[FeatureHighWord]>; +def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>; +def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>, + Requires<[FeatureHighWord]>; + +// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH, +// depending on the choice of register. +def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>, + Requires<[FeatureHighWord]>; +def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>; +def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>, + Requires<[FeatureHighWord]>; +def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>; // 64-bit extensions from memory. -def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64>; -def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>; -def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>; -def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>; -def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>; +def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>; +def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>; +def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>; +def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>; +def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>; //===----------------------------------------------------------------------===// // Truncations @@ -286,21 +507,31 @@ def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>; // Truncations of 64-bit registers to 32-bit registers. def : Pat<(i32 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, subreg_32bit)>; - -// Truncations of 32-bit registers to memory. -let isCodeGenOnly = 1 in { - defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32>; - defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>; - def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; -} + (EXTRACT_SUBREG GR64:$src, subreg_l32)>; + +// Truncations of 32-bit registers to 8-bit memory. STCMux expands to +// STC, STCY or STCH, depending on the choice of register. +def STCMux : StoreRXYPseudo<truncstorei8, GRX32, 1>, + Requires<[FeatureHighWord]>; +defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>; +def STCH : StoreRXY<"stch", 0xE3C3, truncstorei8, GRH32, 1>, + Requires<[FeatureHighWord]>; + +// Truncations of 32-bit registers to 16-bit memory. STHMux expands to +// STH, STHY or STHH, depending on the choice of register. +def STHMux : StoreRXYPseudo<truncstorei16, GRX32, 1>, + Requires<[FeatureHighWord]>; +defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>; +def STHH : StoreRXY<"sthh", 0xE3C7, truncstorei16, GRH32, 2>, + Requires<[FeatureHighWord]>; +def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; // Truncations of 64-bit registers to memory. -defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64>; -defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>; -def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>; -defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>; -def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>; +defm : StoreGR64Pair<STC, STCY, truncstorei8>; +defm : StoreGR64Pair<STH, STHY, truncstorei16>; +def : StoreGR64PC<STHRL, aligned_truncstorei16>; +defm : StoreGR64Pair<ST, STY, truncstorei32>; +def : StoreGR64PC<STRL, aligned_truncstorei32>; //===----------------------------------------------------------------------===// // Multi-register moves @@ -318,50 +549,77 @@ def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>; // Byte-swapping register moves. let neverHasSideEffects = 1 in { - def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; - def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; + def LRVR : UnaryRRE<"lrv", 0xB91F, bswap, GR32, GR32>; + def LRVGR : UnaryRRE<"lrvg", 0xB90F, bswap, GR64, GR64>; } -// Byte-swapping loads. -def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap>, GR32>; -def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap>, GR64>; +// Byte-swapping loads. Unlike normal loads, these instructions are +// allowed to access storage more than once. +def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>; +def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>; -// Byte-swapping stores. -def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap>, GR32>; -def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>; +// Likewise byte-swapping stores. +def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>; +def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>, + GR64, 8>; //===----------------------------------------------------------------------===// // Load address instructions //===----------------------------------------------------------------------===// // Load BDX-style addresses. -let neverHasSideEffects = 1, Function = "la" in { - let PairType = "12" in - def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src), - "la\t$dst, $src", - [(set GR64:$dst, laaddr12pair:$src)]>; - let PairType = "20" in - def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src), - "lay\t$dst, $src", - [(set GR64:$dst, laaddr20pair:$src)]>; +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, + DispKey = "la" in { + let DispSize = "12" in + def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2), + "la\t$R1, $XBD2", + [(set GR64:$R1, laaddr12pair:$XBD2)]>; + let DispSize = "20" in + def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2), + "lay\t$R1, $XBD2", + [(set GR64:$R1, laaddr20pair:$XBD2)]>; } // Load a PC-relative address. There's no version of this instruction // with a 16-bit offset, so there's no relaxation. -let neverHasSideEffects = 1 in { - def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src), - "larl\t$dst, $src", - [(set GR64:$dst, pcrel32:$src)]>; +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { + def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2), + "larl\t$R1, $I2", + [(set GR64:$R1, pcrel32:$I2)]>; } //===----------------------------------------------------------------------===// -// Negation +// Absolute and Negation //===----------------------------------------------------------------------===// -let Defs = [PSW] in { - def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>; - def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>; - def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>; +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LPR : UnaryRR <"lp", 0x10, z_iabs32, GR32, GR32>; + def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs64, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>; +} +defm : SXU<z_iabs64, LPGFR>; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LNR : UnaryRR <"ln", 0x11, z_inegabs32, GR32, GR32>; + def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs64, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>; +} +defm : SXU<z_inegabs64, LNGFR>; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; } defm : SXU<ineg, LCGFR>; @@ -370,69 +628,83 @@ defm : SXU<ineg, LCGFR>; //===----------------------------------------------------------------------===// let isCodeGenOnly = 1 in - defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>; -defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>; + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>; -defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>; -defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>; +defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>; -defm : InsertMem<"inserti8", IC, GR64, zextloadi8, bdxaddr12pair>; -defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>; +defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>; // Insertions of a 16-bit immediate, leaving other bits unaffected. // We don't have or_as_insert equivalents of these operations because // OI is available instead. -let isCodeGenOnly = 1 in { - def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>; - def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>; -} -def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>; -def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>; -def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>; -def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>; +// +// IIxMux expands to II[LH]x, depending on the choice of register. +def IILMux : BinaryRIPseudo<insertll, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; +def IIHMux : BinaryRIPseudo<insertlh, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; +def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>; +def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>; +def IIHL : BinaryRI<"iihl", 0xA51, insertll, GRH32, imm32ll16>; +def IIHH : BinaryRI<"iihh", 0xA50, insertlh, GRH32, imm32lh16>; +def IILL64 : BinaryAliasRI<insertll, GR64, imm64ll16>; +def IILH64 : BinaryAliasRI<insertlh, GR64, imm64lh16>; +def IIHL64 : BinaryAliasRI<inserthl, GR64, imm64hl16>; +def IIHH64 : BinaryAliasRI<inserthh, GR64, imm64hh16>; // ...likewise for 32-bit immediates. For GR32s this is a general // full-width move. (We use IILF rather than something like LLILF // for 32-bit moves because IILF leaves the upper 32 bits of the // GR64 unchanged.) -let isCodeGenOnly = 1 in { - def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { + def IIFMux : UnaryRIPseudo<bitconvert, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; + def IIHF : UnaryRIL<"iihf", 0xC08, bitconvert, GRH32, uimm32>; } -def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>; -def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>; +def IILF64 : BinaryAliasRIL<insertlf, GR64, imm64lf32>; +def IIHF64 : BinaryAliasRIL<inserthf, GR64, imm64hf32>; // An alternative model of inserthf, with the first operand being // a zero-extended value. def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), - (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit), - imm64hf32:$imm)>; + (IIHF64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + imm64hf32:$imm)>; //===----------------------------------------------------------------------===// // Addition //===----------------------------------------------------------------------===// // Plain addition. -let Defs = [PSW] in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of a register. let isCommutable = 1 in { - def AR : BinaryRR <"ar", 0x1A, add, GR32, GR32>; - def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>; + defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>; + defm AGR : BinaryRREAndK<"ag", 0xB908, 0xB9E8, add, GR64, GR64>; } - def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>; + def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>; // Addition of signed 16-bit immediates. - def AHI : BinaryRI<"ahi", 0xA7A, add, GR32, imm32sx16>; - def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>; + defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>; + defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>; + defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>; // Addition of signed 32-bit immediates. + def AFIMux : BinaryRIPseudo<add, GRX32, simm32>, + Requires<[FeatureHighWord]>; def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>; + def AIH : BinaryRIL<"aih", 0xCC8, add, GRH32, simm32>, + Requires<[FeatureHighWord]>; def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>; // Addition of memory. - defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>; - defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load>; - def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>; - def AG : BinaryRXY<"ag", 0xE308, add, GR64, load>; + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>; + defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>; + def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>; + def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>; // Addition to memory. def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; @@ -441,34 +713,40 @@ let Defs = [PSW] in { defm : SXB<add, GR64, AGFR>; // Addition producing a carry. -let Defs = [PSW] in { +let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { - def ALR : BinaryRR <"alr", 0x1E, addc, GR32, GR32>; - def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>; + defm ALR : BinaryRRAndK<"al", 0x1E, 0xB9FA, addc, GR32, GR32>; + defm ALGR : BinaryRREAndK<"alg", 0xB90A, 0xB9EA, addc, GR64, GR64>; } - def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>; + def ALGFR : BinaryRRE<"algf", 0xB91A, null_frag, GR64, GR32>; + + // Addition of signed 16-bit immediates. + def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>, + Requires<[FeatureDistinctOps]>; + def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, addc, GR64, imm64sx16>, + Requires<[FeatureDistinctOps]>; // Addition of unsigned 32-bit immediates. def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; // Addition of memory. - defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>; - def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>; - def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load>; + defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; + def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>; + def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>; } defm : ZXB<addc, GR64, ALGFR>; // Addition producing and using a carry. -let Defs = [PSW], Uses = [PSW] in { +let Defs = [CC], Uses = [CC] in { // Addition of a register. - def ALCR : BinaryRRE<"alcr", 0xB998, adde, GR32, GR32>; - def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>; + def ALCR : BinaryRRE<"alc", 0xB998, adde, GR32, GR32>; + def ALCGR : BinaryRRE<"alcg", 0xB988, adde, GR64, GR64>; // Addition of memory. - def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load>; - def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>; + def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load, 4>; + def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>; } //===----------------------------------------------------------------------===// @@ -477,25 +755,26 @@ let Defs = [PSW], Uses = [PSW] in { // Plain substraction. Although immediate forms exist, we use the // add-immediate instruction instead. -let Defs = [PSW] in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of a register. - def SR : BinaryRR <"sr", 0x1B, sub, GR32, GR32>; - def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; - def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>; + defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>; + def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>; + defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>; // Subtraction of memory. - defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>; - def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>; - def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>; + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>; + defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; + def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>; + def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>; } defm : SXB<sub, GR64, SGFR>; // Subtraction producing a carry. -let Defs = [PSW] in { +let Defs = [CC] in { // Subtraction of a register. - def SLR : BinaryRR <"slr", 0x1F, subc, GR32, GR32>; - def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; - def SLGR : BinaryRRE<"slgr", 0xB90B, subc, GR64, GR64>; + defm SLR : BinaryRRAndK<"sl", 0x1F, 0xB9FB, subc, GR32, GR32>; + def SLGFR : BinaryRRE<"slgf", 0xB91B, null_frag, GR64, GR32>; + defm SLGR : BinaryRREAndK<"slg", 0xB90B, 0xB9EB, subc, GR64, GR64>; // Subtraction of unsigned 32-bit immediates. These don't match // subc because we prefer addc for constants. @@ -503,56 +782,78 @@ let Defs = [PSW] in { def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>; // Subtraction of memory. - defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>; - def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>; - def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load>; + defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>; + def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>; + def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>; } defm : ZXB<subc, GR64, SLGFR>; // Subtraction producing and using a carry. -let Defs = [PSW], Uses = [PSW] in { +let Defs = [CC], Uses = [CC] in { // Subtraction of a register. - def SLBR : BinaryRRE<"slbr", 0xB999, sube, GR32, GR32>; - def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>; + def SLBR : BinaryRRE<"slb", 0xB999, sube, GR32, GR32>; + def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>; // Subtraction of memory. - def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load>; - def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>; + def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>; + def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load, 8>; } //===----------------------------------------------------------------------===// // AND //===----------------------------------------------------------------------===// -let Defs = [PSW] in { +let Defs = [CC] in { // ANDs of a register. - let isCommutable = 1 in { - def NR : BinaryRR <"nr", 0x14, and, GR32, GR32>; - def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>; + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>; + defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>; } - // ANDs of a 16-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in { - def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; - def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + let isConvertibleToThreeAddress = 1 in { + // ANDs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. + // + // NIxMux expands to NI[LH]x, depending on the choice of register. + def NILMux : BinaryRIPseudo<and, GRX32, imm32ll16c>, + Requires<[FeatureHighWord]>; + def NIHMux : BinaryRIPseudo<and, GRX32, imm32lh16c>, + Requires<[FeatureHighWord]>; + def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; + def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + def NIHL : BinaryRI<"nihl", 0xA55, and, GRH32, imm32ll16c>; + def NIHH : BinaryRI<"nihh", 0xA54, and, GRH32, imm32lh16c>; + def NILL64 : BinaryAliasRI<and, GR64, imm64ll16c>; + def NILH64 : BinaryAliasRI<and, GR64, imm64lh16c>; + def NIHL64 : BinaryAliasRI<and, GR64, imm64hl16c>; + def NIHH64 : BinaryAliasRI<and, GR64, imm64hh16c>; + + // ANDs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to NILF or NIHF, depending on the choice of register. + def NIFMux : BinaryRIPseudo<and, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; + def NIHF : BinaryRIL<"nihf", 0xC0A, and, GRH32, uimm32>; + } + def NILF64 : BinaryAliasRIL<and, GR64, imm64lf32c>; + def NIHF64 : BinaryAliasRIL<and, GR64, imm64hf32c>; } - def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>; - def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>; - def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>; - def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>; - - // ANDs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in - def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; - def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>; - def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; // ANDs of memory. - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + } // AND to memory defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; + + // Block AND. + let mayLoad = 1, mayStore = 1 in + defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>; } defm : RMWIByte<and, bdaddr12pair, NI>; defm : RMWIByte<and, bdaddr20pair, NIY>; @@ -561,35 +862,55 @@ defm : RMWIByte<and, bdaddr20pair, NIY>; // OR //===----------------------------------------------------------------------===// -let Defs = [PSW] in { +let Defs = [CC] in { // ORs of a register. - let isCommutable = 1 in { - def OR : BinaryRR <"or", 0x16, or, GR32, GR32>; - def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>; + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>; + defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>; } // ORs of a 16-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in { - def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; - def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; - } - def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>; - def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>; - def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>; - def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>; + // The CC result only reflects the 16-bit field, not the full register. + // + // OIxMux expands to OI[LH]x, depending on the choice of register. + def OILMux : BinaryRIPseudo<or, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; + def OIHMux : BinaryRIPseudo<or, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; + def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; + def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; + def OIHL : BinaryRI<"oihl", 0xA59, or, GRH32, imm32ll16>; + def OIHH : BinaryRI<"oihh", 0xA58, or, GRH32, imm32lh16>; + def OILL64 : BinaryAliasRI<or, GR64, imm64ll16>; + def OILH64 : BinaryAliasRI<or, GR64, imm64lh16>; + def OIHL64 : BinaryAliasRI<or, GR64, imm64hl16>; + def OIHH64 : BinaryAliasRI<or, GR64, imm64hh16>; // ORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in - def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; - def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>; - def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>; + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to OILF or OIHF, depending on the choice of register. + def OIFMux : BinaryRIPseudo<or, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; + def OIHF : BinaryRIL<"oihf", 0xC0C, or, GRH32, uimm32>; + } + def OILF64 : BinaryAliasRIL<or, GR64, imm64lf32>; + def OIHF64 : BinaryAliasRIL<or, GR64, imm64hf32>; // ORs of memory. - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; + def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + } // OR to memory defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; + + // Block OR. + let mayLoad = 1, mayStore = 1 in + defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>; } defm : RMWIByte<or, bdaddr12pair, OI>; defm : RMWIByte<or, bdaddr20pair, OIY>; @@ -598,25 +919,38 @@ defm : RMWIByte<or, bdaddr20pair, OIY>; // XOR //===----------------------------------------------------------------------===// -let Defs = [PSW] in { +let Defs = [CC] in { // XORs of a register. - let isCommutable = 1 in { - def XR : BinaryRR <"xr", 0x17, xor, GR32, GR32>; - def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>; + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>; + defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>; } // XORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in - def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; - def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>; - def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>; + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + // Expands to XILF or XIHF, depending on the choice of register. + def XIFMux : BinaryRIPseudo<xor, GRX32, uimm32>, + Requires<[FeatureHighWord]>; + def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; + def XIHF : BinaryRIL<"xihf", 0xC06, xor, GRH32, uimm32>; + } + def XILF64 : BinaryAliasRIL<xor, GR64, imm64lf32>; + def XIHF64 : BinaryAliasRIL<xor, GR64, imm64hf32>; // XORs of memory. - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; + def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + } // XOR to memory defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; + + // Block XOR. + let mayLoad = 1, mayStore = 1 in + defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>; } defm : RMWIByte<xor, bdaddr12pair, XI>; defm : RMWIByte<xor, bdaddr20pair, XIY>; @@ -627,10 +961,10 @@ defm : RMWIByte<xor, bdaddr20pair, XIY>; // Multiplication of a register. let isCommutable = 1 in { - def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>; - def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>; + def MSR : BinaryRRE<"ms", 0xB252, mul, GR32, GR32>; + def MSGR : BinaryRRE<"msg", 0xB90C, mul, GR64, GR64>; } -def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>; +def MSGFR : BinaryRRE<"msgf", 0xB91C, null_frag, GR64, GR32>; defm : SXB<mul, GR64, MSGFR>; // Multiplication of a signed 16-bit immediate. @@ -642,33 +976,32 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; // Multiplication of memory. -defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>; -defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>; -def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>; -def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load>; +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>; +defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>; +def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of a register, producing two results. -def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; +def MLGR : BinaryRRE<"mlg", 0xB986, z_umul_lohi64, GR128, GR64>; // Multiplication of memory, producing two results. -def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>; +def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; //===----------------------------------------------------------------------===// // Division and remainder //===----------------------------------------------------------------------===// // Division and remainder, from registers. -def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; -def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; -def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; -def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; -defm : SXB<z_sdivrem64, GR128, DSGFR>; +def DSGFR : BinaryRRE<"dsgf", 0xB91D, z_sdivrem32, GR128, GR32>; +def DSGR : BinaryRRE<"dsg", 0xB90D, z_sdivrem64, GR128, GR64>; +def DLR : BinaryRRE<"dl", 0xB997, z_udivrem32, GR128, GR32>; +def DLGR : BinaryRRE<"dlg", 0xB987, z_udivrem64, GR128, GR64>; // Division and remainder, from memory. -def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>; -def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>; -def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>; -def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; +def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; +def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; +def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; +def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; //===----------------------------------------------------------------------===// // Shifts @@ -676,111 +1009,188 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; // Shift left. let neverHasSideEffects = 1 in { - def SLL : ShiftRS <"sll", 0x89, shl, GR32, shift12only>; - def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>; + defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; + def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>; } // Logical shift right. let neverHasSideEffects = 1 in { - def SRL : ShiftRS <"srl", 0x88, srl, GR32, shift12only>; - def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>; + defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; + def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>; } // Arithmetic shift right. -let Defs = [PSW] in { - def SRA : ShiftRS <"sra", 0x8A, sra, GR32, shift12only>; - def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>; +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; + def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>; } // Rotate left. let neverHasSideEffects = 1 in { - def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32, shift20only>; - def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>; + def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>; + def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>; } // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and -// end bits (operands 2 and 3) are in the range [32, 64) -let Defs = [PSW] in { +// end bits (operands 2 and 3) are in the range [32, 64). +let Defs = [CC] in { let isCodeGenOnly = 1 in - def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; - def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; + def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; +} + +// Forms of RISBG that only affect one word of the destination register. +// They do not set CC. +def RISBMux : RotateSelectRIEfPseudo<GRX32, GRX32>, Requires<[FeatureHighWord]>; +def RISBLL : RotateSelectAliasRIEf<GR32, GR32>, Requires<[FeatureHighWord]>; +def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>, Requires<[FeatureHighWord]>; +def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>, Requires<[FeatureHighWord]>; +def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>, Requires<[FeatureHighWord]>; +def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>, + Requires<[FeatureHighWord]>; +def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>, + Requires<[FeatureHighWord]>; + +// Rotate second operand left and perform a logical operation with selected +// bits of the first operand. The CC result only describes the selected bits, +// so isn't useful for a full comparison against zero. +let Defs = [CC] in { + def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>; + def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>; + def RXSBG : RotateSelectRIEf<"rxsbg", 0xEC57, GR64, GR64>; } //===----------------------------------------------------------------------===// // Comparison //===----------------------------------------------------------------------===// -// Signed comparisons. -let Defs = [PSW] in { +// Signed comparisons. We put these before the unsigned comparisons because +// some of the signed forms have COMPARE AND BRANCH equivalents whereas none +// of the unsigned forms do. +let Defs = [CC], CCValues = 0xE in { // Comparison with a register. - def CR : CompareRR <"cr", 0x19, z_cmp, GR32, GR32>; - def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; - def CGR : CompareRRE<"cgr", 0xB920, z_cmp, GR64, GR64>; + def CR : CompareRR <"c", 0x19, z_scmp, GR32, GR32>; + def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>; + def CGR : CompareRRE<"cg", 0xB920, z_scmp, GR64, GR64>; // Comparison with a signed 16-bit immediate. - def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>; - def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>; - - // Comparison with a signed 32-bit immediate. - def CFI : CompareRIL<"cfi", 0xC2D, z_cmp, GR32, simm32>; - def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>; + def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>; + def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>; + + // Comparison with a signed 32-bit immediate. CFIMux expands to CFI or CIH, + // depending on the choice of register. + def CFIMux : CompareRIPseudo<z_scmp, GRX32, simm32>, + Requires<[FeatureHighWord]>; + def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>; + def CIH : CompareRIL<"cih", 0xCCD, z_scmp, GRH32, simm32>, + Requires<[FeatureHighWord]>; + def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>; // Comparison with memory. - defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>; - defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load>; - def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>; - def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>; - def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load>; - def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>; - def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>; - def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>; - def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>; - def CGRL : CompareRILPC<"cgrl", 0xC68, z_cmp, GR64, aligned_load>; + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>; + def CMux : CompareRXYPseudo<z_scmp, GRX32, load, 4>, + Requires<[FeatureHighWord]>; + defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>; + def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>, + Requires<[FeatureHighWord]>; + def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>; + def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>; + def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>; + def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>; + def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>; + def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>; + def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>; // Comparison between memory and a signed 16-bit immediate. - def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>; - def CHSI : CompareSIL<"chsi", 0xE55C, z_cmp, load, imm32sx16>; - def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load, imm64sx16>; + def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>; + def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>; + def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>; } -defm : SXB<z_cmp, GR64, CGFR>; +defm : SXB<z_scmp, GR64, CGFR>; // Unsigned comparisons. -let Defs = [PSW] in { +let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { // Comparison with a register. - def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; - def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; - def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; - - // Comparison with a signed 32-bit immediate. + def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>; + def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>; + def CLGR : CompareRRE<"clg", 0xB921, z_ucmp, GR64, GR64>; + + // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI + // or CLIH, depending on the choice of register. + def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>, + Requires<[FeatureHighWord]>; def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>; + def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GR32, uimm32>, + Requires<[FeatureHighWord]>; def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; // Comparison with memory. - defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>; - def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>; - def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load>; + def CLMux : CompareRXYPseudo<z_ucmp, GRX32, load, 4>, + Requires<[FeatureHighWord]>; + defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>; + def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>, + Requires<[FeatureHighWord]>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>; + def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>; def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, - aligned_zextloadi16>; + aligned_azextloadi16>; def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, aligned_load>; def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64, - aligned_zextloadi16>; + aligned_azextloadi16>; def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64, - aligned_zextloadi32>; + aligned_azextloadi32>; def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64, aligned_load>; // Comparison between memory and an unsigned 8-bit immediate. - defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>; + defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>; // Comparison between memory and an unsigned 16-bit immediate. - def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>; - def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>; - def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>; + def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>; + def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>; + def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>; } defm : ZXB<z_ucmp, GR64, CLGFR>; +// Memory-to-memory comparison. +let mayLoad = 1, Defs = [CC] in + defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>; + +// String comparison. +let mayLoad = 1, Defs = [CC], Uses = [R0L] in + defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; + +// Test under mask. +let Defs = [CC] in { + // TMxMux expands to TM[LH]x, depending on the choice of register. + def TMLMux : CompareRIPseudo<z_tm_reg, GRX32, imm32ll16>, + Requires<[FeatureHighWord]>; + def TMHMux : CompareRIPseudo<z_tm_reg, GRX32, imm32lh16>, + Requires<[FeatureHighWord]>; + def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>; + def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>; + def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GRH32, imm32ll16>; + def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GRH32, imm32lh16>; + + def TMLL64 : CompareAliasRI<z_tm_reg, GR64, imm64ll16>; + def TMLH64 : CompareAliasRI<z_tm_reg, GR64, imm64lh16>; + def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>; + def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>; + + defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>; +} + +//===----------------------------------------------------------------------===// +// Prefetch +//===----------------------------------------------------------------------===// + +def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; +def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; + //===----------------------------------------------------------------------===// // Atomic operations //===----------------------------------------------------------------------===// @@ -805,60 +1215,60 @@ def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>; def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>; def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>; def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>; -def ATOMIC_LOAD_NILL32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>; -def ATOMIC_LOAD_NILH32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>; -def ATOMIC_LOAD_NILF32 : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>; +def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>; +def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>; +def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>; def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>; -def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>; -def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>; -def ATOMIC_LOAD_NIHL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>; -def ATOMIC_LOAD_NIHH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>; -def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>; -def ATOMIC_LOAD_NIHF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>; +def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>; +def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>; +def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>; +def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>; +def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>; +def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>; def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>; def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>; def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>; -def ATOMIC_LOAD_OILL32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>; -def ATOMIC_LOAD_OILH32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>; -def ATOMIC_LOAD_OILF32 : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>; +def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>; +def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>; +def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>; def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>; -def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>; -def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>; -def ATOMIC_LOAD_OIHL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>; -def ATOMIC_LOAD_OIHH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>; -def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>; -def ATOMIC_LOAD_OIHF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>; +def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>; +def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>; +def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>; +def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>; +def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>; +def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>; def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>; def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>; def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>; -def ATOMIC_LOAD_XILF32 : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>; +def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>; def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>; -def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>; -def ATOMIC_LOAD_XIHF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>; +def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>; +def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>; def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>; def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand, imm32lh16c>; def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>; -def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32, +def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32, imm32ll16c>; -def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32, +def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32, imm32lh16c>; -def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>; +def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>; def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>; -def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm64<atomic_load_nand_64, +def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64, imm64ll16c>; -def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm64<atomic_load_nand_64, +def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64, imm64lh16c>; -def ATOMIC_LOAD_NIHLi : AtomicLoadBinaryImm64<atomic_load_nand_64, +def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64, imm64hl16c>; -def ATOMIC_LOAD_NIHHi : AtomicLoadBinaryImm64<atomic_load_nand_64, +def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64, imm64hh16c>; -def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm64<atomic_load_nand_64, +def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64, imm64lf32c>; -def ATOMIC_LOAD_NIHFi : AtomicLoadBinaryImm64<atomic_load_nand_64, +def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64, imm64hf32c>; def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>; @@ -885,13 +1295,13 @@ def ATOMIC_CMP_SWAPW (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap, ADDR32:$bitshift, ADDR32:$negbitshift, uimm32:$bitsize))]> { - let Defs = [PSW]; + let Defs = [CC]; let mayLoad = 1; let mayStore = 1; let usesCustomInserter = 1; } -let Defs = [PSW] in { +let Defs = [CC] in { defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>; def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>; } @@ -900,34 +1310,30 @@ let Defs = [PSW] in { // Miscellaneous Instructions. //===----------------------------------------------------------------------===// +// Extract CC into bits 29 and 28 of a register. +let Uses = [CC] in + def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>; + // Read a 32-bit access register into a GR32. As with all GR32 operations, // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful // when a 64-bit address is stored in a pair of access registers. -def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src), - "ear\t$dst, $src", - [(set GR32:$dst, (z_extract_access access_reg:$src))]>; +def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2), + "ear\t$R1, $R2", + [(set GR32:$R1, (z_extract_access access_reg:$R2))]>; // Find leftmost one, AKA count leading zeros. The instruction actually // returns a pair of GR64s, the first giving the number of leading zeros // and the second giving a copy of the source with the leftmost one bit // cleared. We only use the first result here. -let Defs = [PSW] in { - def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; +let Defs = [CC] in { + def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>; } def : Pat<(ctlz GR64:$src), - (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>; + (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>; - -// There are no 32-bit equivalents of LLILL and LLILH, so use a full -// 64-bit move followed by a subreg. This preserves the invariant that -// all GR32 operations only modify the low 32 bits. -def : Pat<(i32 imm32ll16:$src), - (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>; -def : Pat<(i32 imm32lh16:$src), - (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>; + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; // Extend GR32s and GR64s to GR128s. let usesCustomInserter = 1 in { @@ -936,6 +1342,10 @@ let usesCustomInserter = 1 in { def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; } +// Search a block of memory for a character. +let mayLoad = 1, Defs = [CC], Uses = [R0L] in + defm SRST : StringRRE<"srst", 0xb25e, z_search_string>; + //===----------------------------------------------------------------------===// // Peepholes. //===----------------------------------------------------------------------===// @@ -944,12 +1354,40 @@ let usesCustomInserter = 1 in { defm : ZXB<add, GR64, ALGFR>; def : Pat<(add GR64:$src1, imm64zx32:$src2), (ALGFI GR64:$src1, imm64zx32:$src2)>; -def : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), +def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), (ALGF GR64:$src1, bdxaddr20only:$addr)>; // Use SL* for GR64 subtractions of unsigned 32-bit values. defm : ZXB<sub, GR64, SLGFR>; def : Pat<(add GR64:$src1, imm64zx32n:$src2), (SLGFI GR64:$src1, imm64zx32n:$src2)>; -def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), +def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), (SLGF GR64:$src1, bdxaddr20only:$addr)>; + +// Optimize sign-extended 1/0 selects to -1/0 selects. This is important +// for vector legalization. +def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, uimm8zx4:$cc)), + (i32 31)), + (i32 31)), + (Select32 (LHI -1), (LHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; +def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, + uimm8zx4:$cc)))), + (i32 63)), + (i32 63)), + (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; + +// Peepholes for turning scalar operations into block operations. +defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence, + XCSequence, 1>; +defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence, + XCSequence, 2>; +defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence, + XCSequence, 4>; +defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence, + OCSequence, XCSequence, 1>; +defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence, + XCSequence, 2>; +defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence, + XCSequence, 4>; +defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence, + XCSequence, 8>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp new file mode 100644 index 0000000..ba027d4 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -0,0 +1,462 @@ +//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that all branches are in range. There are several ways +// in which this could be done. One aggressive approach is to assume that all +// branches are in range and successively replace those that turn out not +// to be in range with a longer form (branch relaxation). A simple +// implementation is to continually walk through the function relaxing +// branches until no more changes are needed and a fixed point is reached. +// However, in the pathological worst case, this implementation is +// quadratic in the number of blocks; relaxing branch N can make branch N-1 +// go out of range, which in turn can make branch N-2 go out of range, +// and so on. +// +// An alternative approach is to assume that all branches must be +// converted to their long forms, then reinstate the short forms of +// branches that, even under this pessimistic assumption, turn out to be +// in range (branch shortening). This too can be implemented as a function +// walk that is repeated until a fixed point is reached. In general, +// the result of shortening is not as good as that of relaxation, and +// shortening is also quadratic in the worst case; shortening branch N +// can bring branch N-1 in range of the short form, which in turn can do +// the same for branch N-2, and so on. The main advantage of shortening +// is that each walk through the function produces valid code, so it is +// possible to stop at any point after the first walk. The quadraticness +// could therefore be handled with a maximum pass count, although the +// question then becomes: what maximum count should be used? +// +// On SystemZ, long branches are only needed for functions bigger than 64k, +// which are relatively rare to begin with, and the long branch sequences +// are actually relatively cheap. It therefore doesn't seem worth spending +// much compilation time on the problem. Instead, the approach we take is: +// +// (1) Work out the address that each block would have if no branches +// need relaxing. Exit the pass early if all branches are in range +// according to this assumption. +// +// (2) Work out the address that each block would have if all branches +// need relaxing. +// +// (3) Walk through the block calculating the final address of each instruction +// and relaxing those that need to be relaxed. For backward branches, +// this check uses the final address of the target block, as calculated +// earlier in the walk. For forward branches, this check uses the +// address of the target block that was calculated in (2). Both checks +// give a conservatively-correct range. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-long-branch" + +#include "SystemZTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(LongBranches, "Number of long branches."); + +namespace { + // Represents positional information about a basic block. + struct MBBInfo { + // The address that we currently assume the block has. + uint64_t Address; + + // The size of the block in bytes, excluding terminators. + // This value never changes. + uint64_t Size; + + // The minimum alignment of the block, as a log2 value. + // This value never changes. + unsigned Alignment; + + // The number of terminators in this block. This value never changes. + unsigned NumTerminators; + + MBBInfo() + : Address(0), Size(0), Alignment(0), NumTerminators(0) {} + }; + + // Represents the state of a block terminator. + struct TerminatorInfo { + // If this terminator is a relaxable branch, this points to the branch + // instruction, otherwise it is null. + MachineInstr *Branch; + + // The address that we currently assume the terminator has. + uint64_t Address; + + // The current size of the terminator in bytes. + uint64_t Size; + + // If Branch is nonnull, this is the number of the target block, + // otherwise it is unused. + unsigned TargetBlock; + + // If Branch is nonnull, this is the length of the longest relaxed form, + // otherwise it is zero. + unsigned ExtraRelaxSize; + + TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {} + }; + + // Used to keep track of the current position while iterating over the blocks. + struct BlockPosition { + // The address that we assume this position has. + uint64_t Address; + + // The number of low bits in Address that are known to be the same + // as the runtime address. + unsigned KnownBits; + + BlockPosition(unsigned InitialAlignment) + : Address(0), KnownBits(InitialAlignment) {} + }; + + class SystemZLongBranch : public MachineFunctionPass { + public: + static char ID; + SystemZLongBranch(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(0) {} + + virtual const char *getPassName() const { + return "SystemZ Long Branch"; + } + + bool runOnMachineFunction(MachineFunction &F); + + private: + void skipNonTerminators(BlockPosition &Position, MBBInfo &Block); + void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, + bool AssumeRelaxed); + TerminatorInfo describeTerminator(MachineInstr *MI); + uint64_t initMBBInfo(); + bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); + bool mustRelaxABranch(); + void setWorstCaseAddresses(); + void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode); + void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode); + void relaxBranch(TerminatorInfo &Terminator); + void relaxBranches(); + + const SystemZInstrInfo *TII; + MachineFunction *MF; + SmallVector<MBBInfo, 16> MBBs; + SmallVector<TerminatorInfo, 16> Terminators; + }; + + char SystemZLongBranch::ID = 0; + + const uint64_t MaxBackwardRange = 0x10000; + const uint64_t MaxForwardRange = 0xfffe; +} // end of anonymous namespace + +FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { + return new SystemZLongBranch(TM); +} + +// Position describes the state immediately before Block. Update Block +// accordingly and move Position to the end of the block's non-terminator +// instructions. +void SystemZLongBranch::skipNonTerminators(BlockPosition &Position, + MBBInfo &Block) { + if (Block.Alignment > Position.KnownBits) { + // When calculating the address of Block, we need to conservatively + // assume that Block had the worst possible misalignment. + Position.Address += ((uint64_t(1) << Block.Alignment) - + (uint64_t(1) << Position.KnownBits)); + Position.KnownBits = Block.Alignment; + } + + // Align the addresses. + uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1; + Position.Address = (Position.Address + AlignMask) & ~AlignMask; + + // Record the block's position. + Block.Address = Position.Address; + + // Move past the non-terminators in the block. + Position.Address += Block.Size; +} + +// Position describes the state immediately before Terminator. +// Update Terminator accordingly and move Position past it. +// Assume that Terminator will be relaxed if AssumeRelaxed. +void SystemZLongBranch::skipTerminator(BlockPosition &Position, + TerminatorInfo &Terminator, + bool AssumeRelaxed) { + Terminator.Address = Position.Address; + Position.Address += Terminator.Size; + if (AssumeRelaxed) + Position.Address += Terminator.ExtraRelaxSize; +} + +// Return a description of terminator instruction MI. +TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { + TerminatorInfo Terminator; + Terminator.Size = TII->getInstSizeInBytes(MI); + if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) { + switch (MI->getOpcode()) { + case SystemZ::J: + // Relaxes to JG, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::BRC: + // Relaxes to BRCL, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::BRCT: + case SystemZ::BRCTG: + // Relaxes to A(G)HI and BRCL, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; + case SystemZ::CRJ: + case SystemZ::CLRJ: + // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer. + Terminator.ExtraRelaxSize = 2; + break; + case SystemZ::CGRJ: + case SystemZ::CLGRJ: + // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer. + Terminator.ExtraRelaxSize = 4; + break; + case SystemZ::CIJ: + case SystemZ::CGIJ: + // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer. + Terminator.ExtraRelaxSize = 4; + break; + case SystemZ::CLIJ: + case SystemZ::CLGIJ: + // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; + default: + llvm_unreachable("Unrecognized branch instruction"); + } + Terminator.Branch = MI; + Terminator.TargetBlock = + TII->getBranchInfo(MI).Target->getMBB()->getNumber(); + } + return Terminator; +} + +// Fill MBBs and Terminators, setting the addresses on the assumption +// that no branches need relaxation. Return the size of the function under +// this assumption. +uint64_t SystemZLongBranch::initMBBInfo() { + MF->RenumberBlocks(); + unsigned NumBlocks = MF->size(); + + MBBs.clear(); + MBBs.resize(NumBlocks); + + Terminators.clear(); + Terminators.reserve(NumBlocks); + + BlockPosition Position(MF->getAlignment()); + for (unsigned I = 0; I < NumBlocks; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(I); + MBBInfo &Block = MBBs[I]; + + // Record the alignment, for quick access. + Block.Alignment = MBB->getAlignment(); + + // Calculate the size of the fixed part of the block. + MachineBasicBlock::iterator MI = MBB->begin(); + MachineBasicBlock::iterator End = MBB->end(); + while (MI != End && !MI->isTerminator()) { + Block.Size += TII->getInstSizeInBytes(MI); + ++MI; + } + skipNonTerminators(Position, Block); + + // Add the terminators. + while (MI != End) { + if (!MI->isDebugValue()) { + assert(MI->isTerminator() && "Terminator followed by non-terminator"); + Terminators.push_back(describeTerminator(MI)); + skipTerminator(Position, Terminators.back(), false); + ++Block.NumTerminators; + } + ++MI; + } + } + + return Position.Address; +} + +// Return true if, under current assumptions, Terminator would need to be +// relaxed if it were placed at address Address. +bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, + uint64_t Address) { + if (!Terminator.Branch) + return false; + + const MBBInfo &Target = MBBs[Terminator.TargetBlock]; + if (Address >= Target.Address) { + if (Address - Target.Address <= MaxBackwardRange) + return false; + } else { + if (Target.Address - Address <= MaxForwardRange) + return false; + } + + return true; +} + +// Return true if, under current assumptions, any terminator needs +// to be relaxed. +bool SystemZLongBranch::mustRelaxABranch() { + for (SmallVectorImpl<TerminatorInfo>::iterator TI = Terminators.begin(), + TE = Terminators.end(); TI != TE; ++TI) + if (mustRelaxBranch(*TI, TI->Address)) + return true; + return false; +} + +// Set the address of each block on the assumption that all branches +// must be long. +void SystemZLongBranch::setWorstCaseAddresses() { + SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); + BlockPosition Position(MF->getAlignment()); + for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end(); + BI != BE; ++BI) { + skipNonTerminators(Position, *BI); + for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { + skipTerminator(Position, *TI, true); + ++TI; + } + } +} + +// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed +// by a BRCL on the result. +void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI, + unsigned AddOpcode) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(AddOpcode)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addImm(-1); + MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE) + .addOperand(MI->getOperand(2)); + // The implicit use of CC is a killing use. + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); + MI->eraseFromParent(); +} + +// Split MI into the comparison given by CompareOpcode followed +// a BRCL on the result. +void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, + unsigned CompareOpcode) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(CompareOpcode)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(3)); + // The implicit use of CC is a killing use. + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); + MI->eraseFromParent(); +} + +// Relax the branch described by Terminator. +void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { + MachineInstr *Branch = Terminator.Branch; + switch (Branch->getOpcode()) { + case SystemZ::J: + Branch->setDesc(TII->get(SystemZ::JG)); + break; + case SystemZ::BRC: + Branch->setDesc(TII->get(SystemZ::BRCL)); + break; + case SystemZ::BRCT: + splitBranchOnCount(Branch, SystemZ::AHI); + break; + case SystemZ::BRCTG: + splitBranchOnCount(Branch, SystemZ::AGHI); + break; + case SystemZ::CRJ: + splitCompareBranch(Branch, SystemZ::CR); + break; + case SystemZ::CGRJ: + splitCompareBranch(Branch, SystemZ::CGR); + break; + case SystemZ::CIJ: + splitCompareBranch(Branch, SystemZ::CHI); + break; + case SystemZ::CGIJ: + splitCompareBranch(Branch, SystemZ::CGHI); + break; + case SystemZ::CLRJ: + splitCompareBranch(Branch, SystemZ::CLR); + break; + case SystemZ::CLGRJ: + splitCompareBranch(Branch, SystemZ::CLGR); + break; + case SystemZ::CLIJ: + splitCompareBranch(Branch, SystemZ::CLFI); + break; + case SystemZ::CLGIJ: + splitCompareBranch(Branch, SystemZ::CLGFI); + break; + default: + llvm_unreachable("Unrecognized branch"); + } + + Terminator.Size += Terminator.ExtraRelaxSize; + Terminator.ExtraRelaxSize = 0; + Terminator.Branch = 0; + + ++LongBranches; +} + +// Run a shortening pass and relax any branches that need to be relaxed. +void SystemZLongBranch::relaxBranches() { + SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); + BlockPosition Position(MF->getAlignment()); + for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end(); + BI != BE; ++BI) { + skipNonTerminators(Position, *BI); + for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { + assert(Position.Address <= TI->Address && + "Addresses shouldn't go forwards"); + if (mustRelaxBranch(*TI, Position.Address)) + relaxBranch(*TI); + skipTerminator(Position, *TI, false); + ++TI; + } + } +} + +bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo()); + MF = &F; + uint64_t Size = initMBBInfo(); + if (Size <= MaxForwardRange || !mustRelaxABranch()) + return false; + + setWorstCaseAddresses(); + relaxBranches(); + return true; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp index 5d83321..ff9a6c0 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -15,20 +15,6 @@ using namespace llvm; -// Where relaxable pairs of reloc-generating instructions exist, -// we tend to use the longest form by default, since that produces -// correct assembly in cases where no relaxation is performed. -// If Opcode is one such instruction, return the opcode for the -// shortest possible form instead, otherwise return Opcode itself. -static unsigned getShortenedInstr(unsigned Opcode) { - switch (Opcode) { - case SystemZ::BRCL: return SystemZ::BRC; - case SystemZ::JG: return SystemZ::J; - case SystemZ::BRASL: return SystemZ::BRAS; - } - return Opcode; -} - // Return the VK_* enumeration for MachineOperand target flags Flags. static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) { @@ -40,77 +26,75 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); } -SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx, +SystemZMCInstLower::SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmprinter) - : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {} + : Ctx(ctx), AsmPrinter(asmprinter) {} -MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Symbol, - int64_t Offset) const { - MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); - const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); - if (Offset) { - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); +const MCExpr * +SystemZMCInstLower::getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const { + const MCSymbol *Symbol; + bool HasOffset = true; + switch (MO.getType()) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + HasOffset = false; + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = AsmPrinter.getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + HasOffset = false; + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + default: + llvm_unreachable("unknown operand type"); } - return MCOperand::CreateExpr(Expr); + const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + if (HasOffset) + if (int64_t Offset = MO.getOffset()) { + const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + } + return Expr; } MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { switch (MO.getType()) { - default: - llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) - return MCOperand(); return MCOperand::CreateReg(MO.getReg()); case MachineOperand::MO_Immediate: return MCOperand::CreateImm(MO.getImm()); - case MachineOperand::MO_MachineBasicBlock: - return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), - /* MO has no offset field */0); - - case MachineOperand::MO_GlobalAddress: - return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()), - MO.getOffset()); - - case MachineOperand::MO_ExternalSymbol: { - StringRef Name = MO.getSymbolName(); - return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name), - MO.getOffset()); - } - - case MachineOperand::MO_JumpTableIndex: - return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()), - /* MO has no offset field */0); - - case MachineOperand::MO_ConstantPoolIndex: - return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()), - MO.getOffset()); - - case MachineOperand::MO_BlockAddress: { - const BlockAddress *BA = MO.getBlockAddress(); - return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA), - MO.getOffset()); + default: { + MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); + return MCOperand::CreateExpr(getExpr(MO, Kind)); } } } void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { - unsigned Opcode = MI->getOpcode(); - // When emitting binary code, start with the shortest form of an instruction - // and then relax it where necessary. - if (!AsmPrinter.OutStreamer.hasRawTextSupport()) - Opcode = getShortenedInstr(Opcode); - OutMI.setOpcode(Opcode); + OutMI.setOpcode(MI->getOpcode()); for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); - MCOperand MCOp = lowerOperand(MO); - if (MCOp.isValid()) - OutMI.addOperand(MCOp); + // Ignore all implicit register operands. + if (!MO.isReg() || !MO.isImplicit()) + OutMI.addOperand(lowerOperand(MO)); } } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h index afa72f3..f6d5ac8 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h @@ -10,37 +10,34 @@ #ifndef LLVM_SYSTEMZMCINSTLOWER_H #define LLVM_SYSTEMZMCINSTLOWER_H +#include "llvm/MC/MCExpr.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Compiler.h" namespace llvm { -class MCContext; class MCInst; class MCOperand; -class MCSymbol; class MachineInstr; class MachineOperand; class Mangler; class SystemZAsmPrinter; class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower { - Mangler *Mang; MCContext &Ctx; SystemZAsmPrinter &AsmPrinter; public: - SystemZMCInstLower(Mangler *mang, MCContext &ctx, - SystemZAsmPrinter &asmPrinter); + SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmPrinter); // Lower MachineInstr MI to MCInst OutMI. void lower(const MachineInstr *MI, MCInst &OutMI) const; - // Return an MCOperand for MO. Return an empty operand if MO is implicit. + // Return an MCOperand for MO. MCOperand lowerOperand(const MachineOperand& MO) const; - // Return an MCOperand for MO, given that it equals Symbol + Offset. - MCOperand lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Symbol, int64_t Offset) const; + // Return an MCExpr for symbolic operand MO with variant kind Kind. + const MCExpr *getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp new file mode 100644 index 0000000..00572d0 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp @@ -0,0 +1,17 @@ +//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" + +using namespace llvm; + + +// pin vtable to this file +void SystemZMachineFunctionInfo::anchor() {} + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 1dc05a7e..845291f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -15,7 +15,7 @@ namespace llvm { class SystemZMachineFunctionInfo : public MachineFunctionInfo { - unsigned SavedGPRFrameSize; + virtual void anchor(); unsigned LowSavedGPR; unsigned HighSavedGPR; unsigned VarArgsFirstGPR; @@ -26,14 +26,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), - VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), - ManipulatesSP(false) {} - - // Get and set the number of bytes allocated by generic code to store - // call-saved GPRs. - unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; } - void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; } + : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), + VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td index 0abc3f7..3ad146c 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -24,57 +24,93 @@ class ImmediateAsmOperand<string name> class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> : PatLeaf<(vt imm), pred, xform>, Operand<vt> { let PrintMethod = "print"##asmop##"Operand"; + let DecoderMethod = "decode"##asmop##"Operand"; let ParserMatchClass = !cast<AsmOperandClass>(asmop); } +// Constructs an asm operand for a PC-relative address. SIZE says how +// many bits there are. +class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> { + let PredicateMethod = "isImm"; + let ParserMethod = "parsePCRel"##size; +} + +// Constructs an operand for a PC-relative address with address type VT. +// ASMOP is the associated asm operand. +class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelOperand"; + let ParserMatchClass = asmop; +} + // Constructs both a DAG pattern and instruction operand for a PC-relative -// address with address size VT. SELF is the name of the operand. -class PCRelAddress<ValueType vt, string self> - : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>, - Operand<vt> { +// address with address size VT. SELF is the name of the operand and +// ASMOP is the associated asm operand. +class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop> + : ComplexPattern<vt, 1, "selectPCRelAddress", + [z_pcrel_wrapper, z_pcrel_offset]>, + PCRelOperand<vt, asmop> { let MIOperandInfo = (ops !cast<Operand>(self)); } // Constructs an AsmOperandClass for addressing mode FORMAT, treating the // registers as having BITSIZE bits and displacements as having DISPSIZE bits. -class AddressAsmOperand<string format, string bitsize, string dispsize> +// LENGTH is "LenN" for addresses with an N-bit length field, otherwise it +// is "". +class AddressAsmOperand<string format, string bitsize, string dispsize, + string length = ""> : AsmOperandClass { - let Name = format##bitsize##"Disp"##dispsize; + let Name = format##bitsize##"Disp"##dispsize##length; let ParserMethod = "parse"##format##bitsize; let RenderMethod = "add"##format##"Operands"; } // Constructs both a DAG pattern and instruction operand for an addressing mode. -// The mode is selected by custom code in selectTYPE...SUFFIX(). The address -// registers have BITSIZE bits and displacements have DISPSIZE bits. NUMOPS is -// the number of operands that make up an address and OPERANDS lists the types -// of those operands using (ops ...). FORMAT is the type of addressing mode, -// which needs to match the names used in AddressAsmOperand. -class AddressingMode<string type, string bitsize, string dispsize, - string suffix, int numops, string format, dag operands> +// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands +// (base register, displacement, etc.). SELTYPE is the type of the memory +// operand for selection purposes; sometimes we want different selection +// choices for the same underlying addressing mode. SUFFIX is similarly +// a suffix appended to the displacement for selection purposes; +// e.g. we want to reject small 20-bit displacements if a 12-bit form +// also exists, but we want to accept them otherwise. +class AddressingMode<string seltype, string bitsize, string dispsize, + string suffix, string length, int numops, string format, + dag operands> : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, - "select"##type##dispsize##suffix, + "select"##seltype##dispsize##suffix##length, [add, sub, or, frameindex, z_adjdynalloc]>, Operand<!cast<ValueType>("i"##bitsize)> { let PrintMethod = "print"##format##"Operand"; + let EncoderMethod = "get"##format##dispsize##length##"Encoding"; + let DecoderMethod = + "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; let MIOperandInfo = operands; let ParserMatchClass = - !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize); + !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); } // An addressing mode with a base and displacement but no index. class BDMode<string type, string bitsize, string dispsize, string suffix> - : AddressingMode<type, bitsize, dispsize, suffix, 2, "BDAddr", + : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr", (ops !cast<RegisterOperand>("ADDR"##bitsize), !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>; // An addressing mode with a base, displacement and index. class BDXMode<string type, string bitsize, string dispsize, string suffix> - : AddressingMode<type, bitsize, dispsize, suffix, 3, "BDXAddr", + : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr", (ops !cast<RegisterOperand>("ADDR"##bitsize), !cast<Immediate>("disp"##dispsize##"imm"##bitsize), !cast<RegisterOperand>("ADDR"##bitsize))>; +// A BDMode paired with an immediate length operand of LENSIZE bits. +class BDLMode<string type, string bitsize, string dispsize, string suffix, + string lensize> + : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3, + "BDLAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize), + !cast<Immediate>("imm"##bitsize))>; + //===----------------------------------------------------------------------===// // Extracting immediate operands from nodes // These all create MVT::i64 nodes to ensure the value is not sign-extended @@ -298,6 +334,10 @@ def imm64sx8 : Immediate<i64, [{ return isInt<8>(N->getSExtValue()); }], SIMM8, "S8Imm">; +def imm64zx8 : Immediate<i64, [{ + return isUInt<8>(N->getSExtValue()); +}], UIMM8, "U8Imm">; + def imm64sx16 : Immediate<i64, [{ return isInt<16>(N->getSExtValue()); }], SIMM16, "S16Imm">; @@ -318,7 +358,7 @@ def imm64zx32n : Immediate<i64, [{ return isUInt<32>(-N->getSExtValue()); }], NEGIMM32, "U32Imm">; -def imm64 : ImmLeaf<i64, [{}]>; +def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>; //===----------------------------------------------------------------------===// // Floating-point immediates @@ -334,30 +374,26 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; // Symbolic address operands //===----------------------------------------------------------------------===// +// PC-relative asm operands. +def PCRel16 : PCRelAsmOperand<"16">; +def PCRel32 : PCRelAsmOperand<"32">; + // PC-relative offsets of a basic block. The offset is sign-extended // and multiplied by 2. -def brtarget16 : Operand<OtherVT> { +def brtarget16 : PCRelOperand<OtherVT, PCRel16> { let EncoderMethod = "getPC16DBLEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; } -def brtarget32 : Operand<OtherVT> { +def brtarget32 : PCRelOperand<OtherVT, PCRel32> { let EncoderMethod = "getPC32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } // A PC-relative offset of a global value. The offset is sign-extended // and multiplied by 2. -def pcrel32 : PCRelAddress<i64, "pcrel32"> { +def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> { let EncoderMethod = "getPC32DBLEncoding"; -} - -// A PC-relative offset of a global value when the value is used as a -// call target. The offset is sign-extended and multiplied by 2. -def pcrel16call : PCRelAddress<i64, "pcrel16call"> { - let PrintMethod = "printCallOperand"; - let EncoderMethod = "getPLT16DBLEncoding"; -} -def pcrel32call : PCRelAddress<i64, "pcrel32call"> { - let PrintMethod = "printCallOperand"; - let EncoderMethod = "getPLT32DBLEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; } //===----------------------------------------------------------------------===// @@ -372,22 +408,25 @@ def disp12imm64 : Operand<i64>; def disp20imm32 : Operand<i32>; def disp20imm64 : Operand<i64>; -def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; -def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; -def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; -def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; -def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; -def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; +def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; +def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; +def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; +def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; +def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; // DAG patterns and operands for addressing modes. Each mode has -// the form <type><range><group> where: +// the form <type><range><group>[<len>] where: // // <type> is one of: // shift : base + displacement (32-bit) // bdaddr : base + displacement +// mviaddr : like bdaddr, but reject cases with a natural index // bdxaddr : base + displacement + index // laaddr : like bdxaddr, but used for Load Address operations // dynalloc : base + displacement + index + ADJDYNALLOC +// bdladdr : base + displacement with a length field // // <range> is one of: // 12 : the displacement is an unsigned 12-bit value @@ -398,20 +437,28 @@ def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; // range value (12 or 20) // only : used when there is no equivalent instruction with the opposite // range value -def shift12only : BDMode <"BDAddr", "32", "12", "Only">; -def shift20only : BDMode <"BDAddr", "32", "20", "Only">; -def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; -def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; -def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; -def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; -def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; -def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; -def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; -def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; -def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; -def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; -def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; -def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +// +// <len> is one of: +// +// <empty> : there is no length field +// len8 : the length field is 8 bits, with a range of [1, 0x100]. +def shift12only : BDMode <"BDAddr", "32", "12", "Only">; +def shift20only : BDMode <"BDAddr", "32", "20", "Only">; +def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; +def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; +def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; +def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; +def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">; +def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">; +def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; +def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; +def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; +def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; +def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; +def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; +def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; +def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; //===----------------------------------------------------------------------===// // Miscellaneous diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td index 8c4df56..31cabaa 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -15,16 +15,25 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, SDTCisVT<1, i64>]>; def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def SDT_ZBRCCMask : SDTypeProfile<0, 2, +def SDT_ZICmp : SDTypeProfile<0, 3, + [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDT_ZBRCCMask : SDTypeProfile<0, 3, [SDTCisVT<0, i8>, - SDTCisVT<1, OtherVT>]>; -def SDT_ZSelectCCMask : SDTypeProfile<1, 3, + SDTCisVT<1, i8>, + SDTCisVT<2, OtherVT>]>; +def SDT_ZSelectCCMask : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i8>]>; + SDTCisVT<3, i8>, + SDTCisVT<4, i8>]>; def SDT_ZWrapPtr : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def SDT_ZWrapOffset : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisPtrTy<0>]>; def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; def SDT_ZExtractAccess : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, @@ -52,6 +61,24 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, SDTCisVT<4, i32>, SDTCisVT<5, i32>, SDTCisVT<6, i32>]>; +def SDT_ZMemMemLength : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i64>]>; +def SDT_ZMemMemLoop : SDTypeProfile<0, 4, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i64>, + SDTCisVT<3, i64>]>; +def SDT_ZString : SDTypeProfile<1, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i32>]>; +def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_ZPrefetch : SDTypeProfile<0, 2, + [SDTCisVT<0, i8>, + SDTCisPtrTy<1>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -70,9 +97,15 @@ def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; -def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>; -def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>; +def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", + SDT_ZWrapOffset, []>; +def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>; +def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>; +def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>; def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain, SDNPInGlue]>; def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, @@ -81,6 +114,7 @@ def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; +def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; @@ -102,10 +136,56 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; +def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, + [SDNPInGlue]>; +def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; + //===----------------------------------------------------------------------===// // Pattern fragments //===----------------------------------------------------------------------===// +// Signed and unsigned comparisons. +def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ + unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + return Type != SystemZICMP::UnsignedOnly; +}]>; +def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ + unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + return Type != SystemZICMP::SignedOnly; +}]>; + +// Register- and memory-based TEST UNDER MASK. +def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>; +def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; + // Register sign-extend operations. Sub-32-bit values are represented as i32s. def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>; def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; @@ -120,17 +200,61 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; +// Extending loads in which the extension type can be signed. +def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + unsigned Type = cast<LoadSDNode>(N)->getExtensionType(); + return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD; +}]>; +def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending loads in which the extension type can be unsigned. +def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + unsigned Type = cast<LoadSDNode>(N)->getExtensionType(); + return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD; +}]>; +def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending loads in which the extension type doesn't matter. +def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD; +}]>; +def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + // Aligned loads. class AlignedLoad<SDPatternOperator load> : PatFrag<(ops node:$addr), (load node:$addr), [{ LoadSDNode *Load = cast<LoadSDNode>(N); return Load->getAlignment() >= Load->getMemoryVT().getStoreSize(); }]>; -def aligned_load : AlignedLoad<load>; -def aligned_sextloadi16 : AlignedLoad<sextloadi16>; -def aligned_sextloadi32 : AlignedLoad<sextloadi32>; -def aligned_zextloadi16 : AlignedLoad<zextloadi16>; -def aligned_zextloadi32 : AlignedLoad<zextloadi32>; +def aligned_load : AlignedLoad<load>; +def aligned_asextloadi16 : AlignedLoad<asextloadi16>; +def aligned_asextloadi32 : AlignedLoad<asextloadi32>; +def aligned_azextloadi16 : AlignedLoad<azextloadi16>; +def aligned_azextloadi32 : AlignedLoad<azextloadi32>; // Aligned stores. class AlignedStore<SDPatternOperator store> @@ -142,6 +266,54 @@ def aligned_store : AlignedStore<store>; def aligned_truncstorei16 : AlignedStore<truncstorei16>; def aligned_truncstorei32 : AlignedStore<truncstorei32>; +// Non-volatile loads. Used for instructions that might access the storage +// location multiple times. +class NonvolatileLoad<SDPatternOperator load> + : PatFrag<(ops node:$addr), (load node:$addr), [{ + LoadSDNode *Load = cast<LoadSDNode>(N); + return !Load->isVolatile(); +}]>; +def nonvolatile_load : NonvolatileLoad<load>; +def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>; +def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>; +def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>; + +// Non-volatile stores. +class NonvolatileStore<SDPatternOperator store> + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{ + StoreSDNode *Store = cast<StoreSDNode>(N); + return !Store->isVolatile(); +}]>; +def nonvolatile_store : NonvolatileStore<store>; +def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>; +def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>; +def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>; + +// A store of a load that can be implemented using MVC. +def mvc_store : PatFrag<(ops node:$value, node:$addr), + (unindexedstore node:$value, node:$addr), + [{ return storeLoadCanUseMVC(N); }]>; + +// Binary read-modify-write operations on memory in which the other +// operand is also memory and for which block operations like NC can +// be used. There are two patterns for each operator, depending on +// which operand contains the "other" load. +multiclass block_op<SDPatternOperator operator> { + def "1" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator node:$value, + (unindexedload node:$addr)), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 0); }]>; + def "2" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator (unindexedload node:$addr), + node:$value), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 1); }]>; +} +defm block_and : block_op<and>; +defm block_or : block_op<or>; +defm block_xor : block_op<xor>; + // Insertions. def inserti8 : PatFrag<(ops node:$src1, node:$src2), (or (and node:$src1, -256), node:$src2)>; @@ -174,6 +346,16 @@ def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2), APInt::getLowBitsSet(BitWidth, 8)); }]>; +// Integer absolute, matching the canonical form generated by DAGCombiner. +def z_iabs32 : PatFrag<(ops node:$src), + (xor (add node:$src, (sra node:$src, (i32 31))), + (sra node:$src, (i32 31)))>; +def z_iabs64 : PatFrag<(ops node:$src), + (xor (add node:$src, (sra node:$src, (i32 63))), + (sra node:$src, (i32 63)))>; +def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>; +def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>; + // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -186,11 +368,11 @@ def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; // Create a unary operator that loads from memory and then performs // the given operation on it. -class loadu<SDPatternOperator operator> +class loadu<SDPatternOperator operator, SDPatternOperator load = load> : PatFrag<(ops node:$addr), (operator (load node:$addr))>; // Create a store operator that performs the given unary operation // on the value before storing it. -class storeu<SDPatternOperator operator> +class storeu<SDPatternOperator operator, SDPatternOperator store = store> : PatFrag<(ops node:$value, node:$addr), (store (operator node:$value), node:$addr)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td index 3689f74..7706351 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -13,7 +13,7 @@ multiclass SXU<SDPatternOperator operator, Instruction insn> { def : Pat<(operator (sext (i32 GR32:$src))), (insn GR32:$src)>; def : Pat<(operator (sext_inreg GR64:$src, i32)), - (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; + (insn (EXTRACT_SUBREG GR64:$src, subreg_l32))>; } // Record that INSN performs a 64-bit version of binary operator OPERATOR @@ -24,7 +24,7 @@ multiclass SXB<SDPatternOperator operator, RegisterOperand cls, def : Pat<(operator cls:$src1, (sext GR32:$src2)), (insn cls:$src1, GR32:$src2)>; def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)), - (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>; + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>; } // Like SXB, but for zero extension. @@ -33,7 +33,7 @@ multiclass ZXB<SDPatternOperator operator, RegisterOperand cls, def : Pat<(operator cls:$src1, (zext GR32:$src2)), (insn cls:$src1, GR32:$src2)>; def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)), - (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>; + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>; } // Record that INSN performs a binary read-modify-write operation, @@ -50,12 +50,8 @@ class RMWI<SDPatternOperator load, SDPatternOperator operator, // memory location. IMM is the type of the second operand. multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode, Instruction insn> { - def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>; - def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>; - def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>; - def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>; - def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>; - def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>; + def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>; + def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>; } // Record that INSN performs insertion TYPE into a register of class CLS. @@ -69,3 +65,88 @@ multiclass InsertMem<string type, Instruction insn, RegisterOperand cls, (load mode:$src2), cls:$src1), (insn cls:$src1, mode:$src2)>; } + +// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64<Instruction insn, SDPatternOperator operator, + AddressingMode mode> + : Pat<(operator GR64:$R1, mode:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), mode:$XBD2)>; + +// INSN and INSNY are an RX/RXY pair of instructions that store the low +// 32 bits of a GPR to memory. Record that they are equivalent to using +// OPERATOR to store a GR64. +multiclass StoreGR64Pair<Instruction insn, Instruction insny, + SDPatternOperator operator> { + def : StoreGR64<insn, operator, bdxaddr12pair>; + def : StoreGR64<insny, operator, bdxaddr20pair>; +} + +// INSN stores the low 32 bits of a GPR using PC-relative addressing. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64PC<Instruction insn, SDPatternOperator operator> + : Pat<(operator GR64:$R1, pcrel32:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), pcrel32:$XBD2)> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory, +// with INSN storing when the condition is true and INSNINV storing when the +// condition is false. Record that they are equivalent to a LOAD/select/STORE +// sequence for GR64s. +multiclass CondStores64<Instruction insn, Instruction insninv, + SDPatternOperator store, SDPatternOperator load, + AddressingMode mode> { + def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr), + (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, + uimm8zx4:$valid, uimm8zx4:$cc)>; + def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr), + (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, + uimm8zx4:$valid, uimm8zx4:$cc)>; +} + +// Try to use MVC instruction INSN for a load of type LOAD followed by a store +// of the same size. VT is the type of the intermediate (legalized) value and +// LENGTH is the number of bytes loaded by LOAD. +multiclass MVCLoadStore<SDPatternOperator load, ValueType vt, Instruction insn, + bits<5> length> { + def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} + +// Use NC-like instruction INSN for block_op operation OPERATOR. +// The other operand is a load of type LOAD, which accesses LENGTH bytes. +// VT is the intermediate legalized type in which the binary operation +// is actually done. +multiclass BinaryLoadStore<SDPatternOperator operator, SDPatternOperator load, + ValueType vt, Instruction insn, bits<5> length> { + def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} + +// A convenient way of generating all block peepholes for a particular +// LOAD/VT/LENGTH combination. +multiclass BlockLoadStore<SDPatternOperator load, ValueType vt, + Instruction mvc, Instruction nc, Instruction oc, + Instruction xc, bits<5> length> { + defm : MVCLoadStore<load, vt, mvc, length>; + defm : BinaryLoadStore<block_and1, load, vt, nc, length>; + defm : BinaryLoadStore<block_and2, load, vt, nc, length>; + defm : BinaryLoadStore<block_or1, load, vt, oc, length>; + defm : BinaryLoadStore<block_or2, load, vt, oc, length>; + defm : BinaryLoadStore<block_xor1, load, vt, xc, length>; + defm : BinaryLoadStore<block_xor2, load, vt, xc, length>; +} + +// Record that INSN is a LOAD AND TEST that can be used to compare +// registers in CLS against zero. The instruction has separate R1 and R2 +// operands, but they must be the same when the instruction is used like this. +class CompareZeroFP<Instruction insn, RegisterOperand cls> + : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td new file mode 100644 index 0000000..f241fb0 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -0,0 +1,46 @@ +//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Processor and feature definitions. +// +//===----------------------------------------------------------------------===// + +class SystemZFeature<string extname, string intname, string desc> + : Predicate<"Subtarget.has"##intname##"()">, + AssemblerPredicate<"Feature"##intname, extname>, + SubtargetFeature<extname, "Has"##intname, "true", desc>; + +def FeatureDistinctOps : SystemZFeature< + "distinct-ops", "DistinctOps", + "Assume that the distinct-operands facility is installed" +>; + +def FeatureLoadStoreOnCond : SystemZFeature< + "load-store-on-cond", "LoadStoreOnCond", + "Assume that the load/store-on-condition facility is installed" +>; + +def FeatureHighWord : SystemZFeature< + "high-word", "HighWord", + "Assume that the high-word facility is installed" +>; + +def FeatureFPExtension : SystemZFeature< + "fp-extension", "FPExtension", + "Assume that the floating-point extension facility is installed" +>; + +def : Processor<"generic", NoItineraries, []>; +def : Processor<"z10", NoItineraries, []>; +def : Processor<"z196", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension]>; +def : Processor<"zEC12", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension]>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index a0ae7ed..b61ae88 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -17,9 +17,8 @@ using namespace llvm; -SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, - const SystemZInstrInfo &tii) - : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm), TII(tii) {} +SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm) + : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm) {} const uint16_t* SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -43,41 +42,19 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (TFI->hasFP(MF)) { // R11D is the frame pointer. Reserve all aliases. Reserved.set(SystemZ::R11D); - Reserved.set(SystemZ::R11W); + Reserved.set(SystemZ::R11L); + Reserved.set(SystemZ::R11H); Reserved.set(SystemZ::R10Q); } // R15D is the stack pointer. Reserve all aliases. Reserved.set(SystemZ::R15D); - Reserved.set(SystemZ::R15W); + Reserved.set(SystemZ::R15L); + Reserved.set(SystemZ::R15H); Reserved.set(SystemZ::R14Q); return Reserved; } -bool -SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator SaveMBBI, - MachineBasicBlock::iterator &UseMBBI, - const TargetRegisterClass *RC, - unsigned Reg) const { - MachineFunction &MF = *MBB.getParent(); - const SystemZFrameLowering *TFI = - static_cast<const SystemZFrameLowering *>(TM.getFrameLowering()); - unsigned Base = getFrameRegister(MF); - uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF); - DebugLoc DL; - - unsigned LoadOpcode, StoreOpcode; - TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); - - // The offset must always be in range of a 12-bit unsigned displacement. - BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode)) - .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0); - BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg) - .addReg(Base).addImm(Offset).addReg(0); - return true; -} - void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, @@ -86,6 +63,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); + const SystemZInstrInfo &TII = + *static_cast<const SystemZInstrInfo*>(TM.getInstrInfo()); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc DL = MI->getDebugLoc(); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 91a70de..13f45fa 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -22,10 +22,10 @@ namespace SystemZ { // Return the subreg to use for referring to the even and odd registers // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. inline unsigned even128(bool Is32bit) { - return Is32bit ? subreg_32bit : subreg_high; + return Is32bit ? subreg_hl32 : subreg_h64; } inline unsigned odd128(bool Is32bit) { - return Is32bit ? subreg_low32 : subreg_low; + return Is32bit ? subreg_l32 : subreg_l64; } } @@ -35,10 +35,9 @@ class SystemZInstrInfo; struct SystemZRegisterInfo : public SystemZGenRegisterInfo { private: SystemZTargetMachine &TM; - const SystemZInstrInfo &TII; public: - SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii); + SystemZRegisterInfo(SystemZTargetMachine &tm); // Override TargetRegisterInfo.h. virtual bool requiresRegisterScavenging(const MachineFunction &MF) const @@ -49,15 +48,14 @@ public: LLVM_OVERRIDE { return true; } + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const + LLVM_OVERRIDE { + return true; + } virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const LLVM_OVERRIDE; virtual BitVector getReservedRegs(const MachineFunction &MF) const LLVM_OVERRIDE; - virtual bool saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator SaveMBBI, - MachineBasicBlock::iterator &UseMBBI, - const TargetRegisterClass *RC, - unsigned Reg) const LLVM_OVERRIDE; virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const LLVM_OVERRIDE; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index bd1b563..93d7c83 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -21,10 +21,12 @@ class SystemZRegWithSubregs<string n, list<Register> subregs> } let Namespace = "SystemZ" in { -def subreg_32bit : SubRegIndex; // could also be known as "subreg_high32" -def subreg_high : SubRegIndex; -def subreg_low : SubRegIndex; -def subreg_low32 : SubRegIndex<[subreg_low, subreg_32bit]>; +def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32. +def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32. +def subreg_l64 : SubRegIndex<64, 0>; +def subreg_h64 : SubRegIndex<64, 64>; +def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>; +def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>; } // Define a register class that contains values of type TYPE and an @@ -54,36 +56,49 @@ class GPR32<bits<16> num, string n> : SystemZReg<n> { } // One of the 16 64-bit general-purpose registers. -class GPR64<bits<16> num, string n, GPR32 low> - : SystemZRegWithSubregs<n, [low]> { +class GPR64<bits<16> num, string n, GPR32 low, GPR32 high> + : SystemZRegWithSubregs<n, [low, high]> { let HWEncoding = num; - let SubRegIndices = [subreg_32bit]; + let SubRegIndices = [subreg_l32, subreg_h32]; } // 8 even-odd pairs of GPR64s. -class GPR128<bits<16> num, string n, GPR64 high, GPR64 low> - : SystemZRegWithSubregs<n, [high, low]> { +class GPR128<bits<16> num, string n, GPR64 low, GPR64 high> + : SystemZRegWithSubregs<n, [low, high]> { let HWEncoding = num; - let SubRegIndices = [subreg_high, subreg_low]; + let SubRegIndices = [subreg_l64, subreg_h64]; } // General-purpose registers foreach I = 0-15 in { - def R#I#W : GPR32<I, "r"#I>; - def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>; + def R#I#L : GPR32<I, "r"#I>; + def R#I#H : GPR32<I, "r"#I>; + def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"L"), !cast<GPR32>("R"#I#"H")>, + DwarfRegNum<[I]>; } foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in { - def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"), - !cast<GPR64>("R"#!add(I, 1)#"D")>; + def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#!add(I, 1)#"D"), + !cast<GPR64>("R"#I#"D")>; } /// Allocate the callee-saved R6-R13 backwards. That way they can be saved /// together with R14 and R15 in one prolog instruction. -defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW", 0, 5), - (sequence "R%uW", 15, 6))>; -defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5), - (sequence "R%uD", 15, 6))>; +defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5), + (sequence "R%uL", 15, 6))>; +defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5), + (sequence "R%uH", 15, 6))>; +defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5), + (sequence "R%uD", 15, 6))>; + +// Combine the low and high GR32s into a single class. This can only be +// used for virtual registers if the high-word facility is available. +defm GRX32 : SystemZRegClass<"GRX32", i32, 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uH", 0, 5), + R15L, R15H, R14L, R14H, R13L, R13H, + R12L, R12H, R11L, R11H, R10L, R10H, + R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>; // The architecture doesn't really have any i128 support, so model the // register pairs as untyped instead. @@ -93,7 +108,7 @@ defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q, // Base and index registers. Everything except R0, which in an address // context evaluates as 0. -defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>; +defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>; defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>; // Not used directly, but needs to exist for ADDR32 and ADDR64 subregs @@ -113,14 +128,14 @@ class FPR32<bits<16> num, string n> : SystemZReg<n> { class FPR64<bits<16> num, string n, FPR32 low> : SystemZRegWithSubregs<n, [low]> { let HWEncoding = num; - let SubRegIndices = [subreg_32bit]; + let SubRegIndices = [subreg_h32]; } // 8 pairs of FPR64s, with a one-register gap inbetween. -class FPR128<bits<16> num, string n, FPR64 high, FPR64 low> - : SystemZRegWithSubregs<n, [high, low]> { +class FPR128<bits<16> num, string n, FPR64 low, FPR64 high> + : SystemZRegWithSubregs<n, [low, high]> { let HWEncoding = num; - let SubRegIndices = [subreg_high, subreg_low]; + let SubRegIndices = [subreg_l64, subreg_h64]; } // Floating-point registers @@ -131,8 +146,8 @@ foreach I = 0-15 in { } foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { - def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"), - !cast<FPR64>("F"#!add(I, 2)#"D")>; + def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"), + !cast<FPR64>("F"#I#"D")>; } // There's no store-multiple instruction for FPRs, so we're not fussy @@ -146,5 +161,7 @@ defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q, // Other registers //===----------------------------------------------------------------------===// -// Status register -def PSW : SystemZReg<"psw">; +// The 2-bit condition code field of the PSW. Every register named in an +// inline asm needs a class associated with it. +def CC : SystemZReg<"cc">; +def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp new file mode 100644 index 0000000..c7ebb5d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -0,0 +1,293 @@ +//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-selectiondag-info" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" + +using namespace llvm; + +SystemZSelectionDAGInfo:: +SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { +} + +SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { +} + +// Decide whether it is best to use a loop or straight-line code for +// a block operation of Size bytes with source address Src and destination +// address Dest. Sequence is the opcode to use for straight-line code +// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). +// Return the chain for the completed operation. +static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, + unsigned Loop, SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size) { + EVT PtrVT = Src.getValueType(); + // The heuristic we use is to prefer loops for anything that would + // require 7 or more MVCs. With these kinds of sizes there isn't + // much to choose between straight-line code and looping code, + // since the time will be dominated by the MVCs themselves. + // However, the loop has 4 or 5 instructions (depending on whether + // the base addresses can be proved equal), so there doesn't seem + // much point using a loop for 5 * 256 bytes or fewer. Anything in + // the range (5 * 256, 6 * 256) will need another instruction after + // the loop, so it doesn't seem worth using a loop then either. + // The next value up, 6 * 256, can be implemented in the same + // number of straight-line MVCs as 6 * 256 - 1. + if (Size > 6 * 256) + return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, PtrVT), + DAG.getConstant(Size / 256, PtrVT)); + return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, PtrVT)); +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + if (IsVolatile) + return SDValue(); + + if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, Dst, Src, CSize->getZExtValue()); + return SDValue(); +} + +// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by +// Chain, Dst, ByteVal and Size. These cases are expected to use +// MVI, MVHHI, MVHI and MVGHI respectively. +static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, uint64_t ByteVal, uint64_t Size, + unsigned Align, + MachinePointerInfo DstPtrInfo) { + uint64_t StoreVal = ByteVal; + for (unsigned I = 1; I < Size; ++I) + StoreVal |= ByteVal << (I * 8); + return DAG.getStore(Chain, DL, + DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), + Dst, DstPtrInfo, false, false, Align); +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Byte, SDValue Size, + unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { + EVT PtrVT = Dst.getValueType(); + + if (IsVolatile) + return SDValue(); + + if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes == 0) + return SDValue(); + if (ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte)) { + // Handle cases that can be done using at most two of + // MVI, MVHI, MVHHI and MVGHI. The latter two can only be + // used if ByteVal is all zeros or all ones; in other casees, + // we can move at most 2 halfwords. + uint64_t ByteVal = CByte->getZExtValue(); + if (ByteVal == 0 || ByteVal == 255 ? + Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : + Bytes <= 4) { + unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); + unsigned Size2 = Bytes - Size1; + SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, + Align, DstPtrInfo); + if (Size2 == 0) + return Chain1; + Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(Size1, PtrVT)); + DstPtrInfo = DstPtrInfo.getWithOffset(Size1); + SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, + std::min(Align, Size1), DstPtrInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } else { + // Handle one and two bytes using STC. + if (Bytes <= 2) { + SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + if (Bytes == 1) + return Chain1; + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, PtrVT)); + SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, + DstPtrInfo.getWithOffset(1), + false, false, 1); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } + assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); + + // Handle the special case of a memset of 0, which can use XC. + ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte); + if (CByte && CByte->getZExtValue() == 0) + return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, + Chain, Dst, Dst, Bytes); + + // Copy the byte to the first location and then use MVC to copy + // it to the rest. + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, PtrVT)); + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, DstPlus1, Dst, Bytes - 1); + } + return SDValue(); +} + +// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), +// deciding whether to use a loop or straight-line code. +static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, uint64_t Size) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + EVT PtrVT = Src1.getValueType(); + // A two-CLC sequence is a clear win over a loop, not least because it + // needs only one branch. A three-CLC sequence needs the same number + // of branches as a loop (i.e. 2), but is shorter. That brings us to + // lengths greater than 768 bytes. It seems relatively likely that + // a difference will be found within the first 768 bytes, so we just + // optimize for the smallest number of branch instructions, in order + // to avoid polluting the prediction buffer too much. A loop only ever + // needs 2 branches, whereas a straight-line sequence would need 3 or more. + if (Size > 3 * 256) + return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, PtrVT), + DAG.getConstant(Size / 256, PtrVT)); + return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, PtrVT)); +} + +// Convert the current CC value into an integer that is 0 if CC == 0, +// less than zero if CC == 1 and greater than zero if CC >= 2. +// The sequence starts with IPM, which puts CC into bits 29 and 28 +// of an integer and clears bits 30 and 31. +static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); + SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, + DAG.getConstant(31, MVT::i32)); + return ROTL; +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + assert(Bytes > 0 && "Caller should have handled 0-size case"); + Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); + SDValue Glue = Chain.getValue(1); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + } + return std::make_pair(SDValue(), SDValue()); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const { + // Use SRST to find the character. End is its address on success. + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); + Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); + Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, + DAG.getConstant(255, MVT::i32)); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, Char); + Chain = End.getValue(1); + SDValue Glue = End.getValue(2); + + // Now select between End and null, depending on whether the character + // was found. + SmallVector<SDValue, 5> Ops; + Ops.push_back(End); + Ops.push_back(DAG.getConstant(0, PtrVT)); + Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32)); + Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); + Ops.push_back(Glue); + VTs = DAG.getVTList(PtrVT, MVT::Glue); + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); + return std::make_pair(End, Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dest, SDValue Src, + MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { + SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); + SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, + DAG.getConstant(0, MVT::i32)); + return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); + SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(0, MVT::i32)); + Chain = Unused.getValue(1); + SDValue Glue = Chain.getValue(2); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); +} + +// Search from Src for a null character, stopping once Src reaches Limit. +// Return a pair of values, the first being the number of nonnull characters +// and the second being the out chain. +// +// This can be used for strlen by setting Limit to 0. +static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Src, + SDValue Limit) { + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, DAG.getConstant(0, MVT::i32)); + Chain = End.getValue(1); + SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); + return std::make_pair(Len, Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT)); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); + return getBoundedStrlen(DAG, DL, Chain, Src, Limit); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h new file mode 100644 index 0000000..281d1e2 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -0,0 +1,80 @@ +//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SystemZ subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZSELECTIONDAGINFO_H +#define SYSTEMZSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SystemZTargetMachine; + +class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); + ~SystemZSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const + LLVM_OVERRIDE; + + virtual SDValue + EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Dst, SDValue Byte, + SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dest, SDValue Src, + MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, + bool isStpcpy) const LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, MachinePointerInfo SrcPtrInfo) const + LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE; +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp new file mode 100644 index 0000000..537a545 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -0,0 +1,163 @@ +//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to replace instructions with shorter forms. For example, +// IILF can be replaced with LLILL or LLILH if the constant fits and if the +// other 32 bits of the GR64 destination are not live. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-shorten-inst" + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +namespace { + class SystemZShortenInst : public MachineFunctionPass { + public: + static char ID; + SystemZShortenInst(const SystemZTargetMachine &tm); + + virtual const char *getPassName() const { + return "SystemZ Instruction Shortening"; + } + + bool processBlock(MachineBasicBlock *MBB); + bool runOnMachineFunction(MachineFunction &F); + + private: + bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, + unsigned LLIxL, unsigned LLIxH); + + const SystemZInstrInfo *TII; + + // LowGPRs[I] has bit N set if LLVM register I includes the low + // word of GPR N. HighGPRs is the same for the high word. + unsigned LowGPRs[SystemZ::NUM_TARGET_REGS]; + unsigned HighGPRs[SystemZ::NUM_TARGET_REGS]; + }; + + char SystemZShortenInst::ID = 0; +} // end of anonymous namespace + +FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) { + return new SystemZShortenInst(TM); +} + +SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(0), LowGPRs(), HighGPRs() { + // Set up LowGPRs and HighGPRs. + for (unsigned I = 0; I < 16; ++I) { + LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I; + LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I; + HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I; + HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I; + if (unsigned GR128 = SystemZMC::GR128Regs[I]) { + LowGPRs[GR128] |= 3 << I; + HighGPRs[GR128] |= 3 << I; + } + } +} + +// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH +// are the halfword immediate loads for the same word. Try to use one of them +// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high +// words referenced by LLVM register X while LiveOther is the mask of low +// words that are currently live, and vice versa. +bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap, + unsigned LiveOther, unsigned LLIxL, + unsigned LLIxH) { + unsigned Reg = MI.getOperand(0).getReg(); + assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); + unsigned GPRs = GPRMap[Reg]; + assert(GPRs != 0 && "Register must be a GPR"); + if (GPRs & LiveOther) + return false; + + uint64_t Imm = MI.getOperand(1).getImm(); + if (SystemZ::isImmLL(Imm)) { + MI.setDesc(TII->get(LLIxL)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + return true; + } + if (SystemZ::isImmLH(Imm)) { + MI.setDesc(TII->get(LLIxH)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + MI.getOperand(1).setImm(Imm >> 16); + return true; + } + return false; +} + +// Process all instructions in MBB. Return true if something changed. +bool SystemZShortenInst::processBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + // Work out which words are live on exit from the block. + unsigned LiveLow = 0; + unsigned LiveHigh = 0; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(), + LE = (*SI)->livein_end(); LI != LE; ++LI) { + unsigned Reg = *LI; + assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); + LiveLow |= LowGPRs[Reg]; + LiveHigh |= HighGPRs[Reg]; + } + } + + // Iterate backwards through the block looking for instructions to change. + for (MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(), + MBBE = MBB->rend(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == SystemZ::IILF) + Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, + SystemZ::LLILH); + else if (Opcode == SystemZ::IIHF) + Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL, + SystemZ::LLIHH); + unsigned UsedLow = 0; + unsigned UsedHigh = 0; + for (MachineInstr::mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + if (MO.isReg()) { + if (unsigned Reg = MO.getReg()) { + assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); + if (MO.isDef()) { + LiveLow &= ~LowGPRs[Reg]; + LiveHigh &= ~HighGPRs[Reg]; + } else if (!MO.isUndef()) { + UsedLow |= LowGPRs[Reg]; + UsedHigh |= HighGPRs[Reg]; + } + } + } + } + LiveLow |= UsedLow; + LiveHigh |= UsedHigh; + } + + return Changed; +} + +bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo()); + + bool Changed = false; + for (MachineFunction::iterator MFI = F.begin(), MFE = F.end(); + MFI != MFE; ++MFI) + Changed |= processBlock(MFI); + + return Changed; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index cfd3324..3971d5e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -9,6 +9,8 @@ #include "SystemZSubtarget.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Host.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -16,13 +18,22 @@ using namespace llvm; +// Pin the vtabel to this file. +void SystemZSubtarget::anchor() {} + SystemZSubtarget::SystemZSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) - : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) { + : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), + HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), + TargetTriple(TT) { std::string CPUName = CPU; if (CPUName.empty()) - CPUName = "z10"; + CPUName = "generic"; +#if defined(__linux__) && defined(__s390x__) + if (CPUName == "generic") + CPUName = sys::getHostCPUName(); +#endif // Parse features string. ParseSubtargetFeatures(CPUName, FS); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h index 8d4d450..5817491 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -26,6 +26,13 @@ class GlobalValue; class StringRef; class SystemZSubtarget : public SystemZGenSubtargetInfo { + virtual void anchor(); +protected: + bool HasDistinctOps; + bool HasLoadStoreOnCond; + bool HasHighWord; + bool HasFPExtension; + private: Triple TargetTriple; @@ -33,9 +40,24 @@ public: SystemZSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS); + // This is important for reducing register pressure in vector code. + virtual bool useAA() const LLVM_OVERRIDE { return true; } + // Automatically generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + // Return true if the target has the distinct-operands facility. + bool hasDistinctOps() const { return HasDistinctOps; } + + // Return true if the target has the load/store-on-condition facility. + bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; } + + // Return true if the target has the high-word facility. + bool hasHighWord() const { return HasHighWord; } + + // Return true if the target has the floating-point extension facility. + bool hasFPExtension() const { return HasFPExtension; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 8c4c456..dee92e9 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -10,6 +10,7 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -33,6 +34,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, "-f32:32-f64:64-f128:64-a0:8:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this, Subtarget) { + initAsmInfo(); } namespace { @@ -46,15 +48,61 @@ public: return getTM<SystemZTargetMachine>(); } - virtual bool addInstSelector(); + virtual void addIRPasses() LLVM_OVERRIDE; + virtual bool addInstSelector() LLVM_OVERRIDE; + virtual bool addPreSched2() LLVM_OVERRIDE; + virtual bool addPreEmitPass() LLVM_OVERRIDE; }; } // end anonymous namespace +void SystemZPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + addPass(createPartiallyInlineLibCallsPass()); +} + bool SystemZPassConfig::addInstSelector() { addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); return false; } +bool SystemZPassConfig::addPreSched2() { + if (getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond()) + addPass(&IfConverterID); + return true; +} + +bool SystemZPassConfig::addPreEmitPass() { + // We eliminate comparisons here rather than earlier because some + // transformations can change the set of available CC values and we + // generally want those transformations to have priority. This is + // especially true in the commonest case where the result of the comparison + // is used by a single in-range branch instruction, since we will then + // be able to fuse the compare and the branch instead. + // + // For example, two-address NILF can sometimes be converted into + // three-address RISBLG. NILF produces a CC value that indicates whether + // the low word is zero, but RISBLG does not modify CC at all. On the + // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. + // The CC value produced by NILL isn't useful for our purposes, but the + // value produced by RISBG can be used for any comparison with zero + // (not just equality). So there are some transformations that lose + // CC values (while still being worthwhile) and others that happen to make + // the CC result more useful than it was originally. + // + // Another reason is that we only want to use BRANCH ON COUNT in cases + // where we know that the count register is not going to be spilled. + // + // Doing it so late makes it more likely that a register will be reused + // between the comparison and the branch, but it isn't clear whether + // preventing that would be a win or not. + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZElimComparePass(getSystemZTargetMachine())); + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZShortenInstPass(getSystemZTargetMachine())); + addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); + return true; +} + TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { return new SystemZPassConfig(this, PM); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index 98614e7..a99a98e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -20,10 +20,10 @@ #include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" +#include "SystemZSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" namespace llvm { @@ -32,7 +32,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { const DataLayout DL; SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + SystemZSelectionDAGInfo TSInfo; SystemZFrameLowering FrameLowering; public: |