diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon')
102 files changed, 14425 insertions, 9517 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 496efbf..becc086 100644 --- a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -10,39 +10,53 @@ #define DEBUG_TYPE "mcasmparser" #include "Hexagon.h" -#include "HexagonRegisterInfo.h" #include "HexagonTargetStreamer.h" -#include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCExpr.h" -#include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonShuffler.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/TargetRegistry.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <utility> using namespace llvm; @@ -65,8 +79,8 @@ static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register", cl::desc("Error for register names that aren't contigious"), cl::init(false)); - namespace { + struct HexagonOperand; class HexagonAsmParser : public MCTargetAsmParser { @@ -93,9 +107,7 @@ class HexagonAsmParser : public MCTargetAsmParser { bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool ParseDirectiveFalign(unsigned Size, SMLoc L); - virtual bool ParseRegister(unsigned &RegNo, - SMLoc &StartLoc, - SMLoc &EndLoc) override; + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseDirectiveSubsection(SMLoc L); bool ParseDirectiveValue(unsigned Size, SMLoc L); bool ParseDirectiveComm(bool IsLocal, SMLoc L); @@ -114,7 +126,7 @@ class HexagonAsmParser : public MCTargetAsmParser { uint64_t &ErrorInfo, bool MatchingInlineAsm) override; unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; - void OutOfRange(SMLoc IDLoc, long long Val, long long Max); + bool OutOfRange(SMLoc IDLoc, long long Val, long long Max); int processInstruction(MCInst &Inst, OperandVector const &Operands, SMLoc IDLoc); @@ -141,14 +153,14 @@ public: MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) { setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); - MCAsmParserExtension::Initialize(_Parser); + MCAsmParserExtension::Initialize(_Parser); - Assembler = nullptr; - // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) - if (!Parser.getStreamer().hasRawTextSupport()) { - MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer()); - Assembler = &MES->getAssembler(); - } + Assembler = nullptr; + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer()); + Assembler = &MES->getAssembler(); + } } bool splitIdentifier(OperandVector &Operands); @@ -157,15 +169,17 @@ public: bool implicitExpressionLocation(OperandVector &Operands); bool parseExpressionOrOperand(OperandVector &Operands); bool parseExpression(MCExpr const *& Expr); - virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, OperandVector &Operands) override + + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override { llvm_unreachable("Unimplemented"); } - virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - AsmToken ID, OperandVector &Operands) override; - virtual bool ParseDirective(AsmToken DirectiveID) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken ID, + OperandVector &Operands) override; + + bool ParseDirective(AsmToken DirectiveID) override; }; /// HexagonOperand - Instances of this class represent a parsed Hexagon machine @@ -219,12 +233,12 @@ public: } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } + SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } + SMLoc getEndLoc() const override { return EndLoc; } - unsigned getReg() const { + unsigned getReg() const override { assert(Kind == Register && "Invalid access!"); return Reg.RegNum; } @@ -234,10 +248,10 @@ public: return Imm.Val; } - bool isToken() const { return Kind == Token; } - bool isImm() const { return Kind == Immediate; } - bool isMem() const { llvm_unreachable("No isMem"); } - bool isReg() const { return Kind == Register; } + bool isToken() const override { return Kind == Token; } + bool isImm() const override { return Kind == Immediate; } + bool isMem() const override { llvm_unreachable("No isMem"); } + bool isReg() const override { return Kind == Register; } bool CheckImmRange(int immBits, int zeroBits, bool isSigned, bool isRelocatable, bool Extendable) const { @@ -259,11 +273,11 @@ public: if (bits == 64) return true; if (Res >= 0) - return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false; + return ((uint64_t)Res < (uint64_t)(1ULL << bits)); else { const int64_t high_bit_set = 1ULL << 63; const uint64_t mask = (high_bit_set >> (63 - bits)); - return (((uint64_t)Res & mask) == mask) ? true : false; + return (((uint64_t)Res & mask) == mask); } } } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable) @@ -276,55 +290,60 @@ public: } bool isf32Ext() const { return false; } - bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); } + bool iss32_0Imm() const { return CheckImmRange(32, 0, true, true, false); } bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); } - bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); } - bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); } - bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); } - bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); } - bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); } + bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); } + bool iss7_0Imm() const { return CheckImmRange(7, 0, true, false, false); } + bool iss6_0Imm() const { return CheckImmRange(6, 0, true, false, false); } bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); } bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); } bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); } bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); } bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); } bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); } - bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); } + bool iss3_0Imm() const { return CheckImmRange(3, 0, true, false, false); } - bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); } - bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); } + bool isu64_0Imm() const { return CheckImmRange(64, 0, false, true, true); } + bool isu32_0Imm() const { return CheckImmRange(32, 0, false, true, false); } bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); } - bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); } bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); } bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); } bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); } bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); } bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); } - bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); } bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); } bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); } bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); } - bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); } - bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); } - bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); } - bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); } - bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); } - bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); } - bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); } - bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); } - bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); } - bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); } - - bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); } - bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); } - - bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); } - bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); } - bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); } - bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); } - bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); } - bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); } - bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); } + bool isu10_0Imm() const { return CheckImmRange(10, 0, false, false, false); } + bool isu9_0Imm() const { return CheckImmRange(9, 0, false, false, false); } + bool isu8_0Imm() const { return CheckImmRange(8, 0, false, false, false); } + bool isu7_0Imm() const { return CheckImmRange(7, 0, false, false, false); } + bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isu5_0Imm() const { return CheckImmRange(5, 0, false, false, false); } + bool isu4_0Imm() const { return CheckImmRange(4, 0, false, false, false); } + bool isu3_0Imm() const { return CheckImmRange(3, 0, false, false, false); } + bool isu2_0Imm() const { return CheckImmRange(2, 0, false, false, false); } + bool isu1_0Imm() const { return CheckImmRange(1, 0, false, false, false); } + + bool ism6_0Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isn8_0Imm() const { return CheckImmRange(8, 0, false, false, false); } + bool isn1Const() const { + if (!isImm()) + return false; + int64_t Value; + if (!getImm()->evaluateAsAbsolute(Value)) + return false; + return Value == -1; + } + + bool iss16_0Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); } + bool iss12_0Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); } + bool iss10_0Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); } + bool iss9_0Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); } + bool iss8_0Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); } + bool iss7_0Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); } + bool iss6_0Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); } bool iss11_0Ext() const { return CheckImmRange(11 + 26, 0, true, true, true); } @@ -338,16 +357,15 @@ public: return CheckImmRange(11 + 26, 3, true, true, true); } - bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } - bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); } - bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); } - bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); } - bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); } + bool isu7_0Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); } + bool isu8_0Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); } + bool isu9_0Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); } + bool isu10_0Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); } bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); } bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); } bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); } - bool isu32MustExt() const { return isImm(); } + bool isu32_0MustExt() const { return isImm(); } void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); @@ -378,22 +396,19 @@ public: addImmOperands(Inst, N); } - void adds32ImmOperands(MCInst &Inst, unsigned N) const { + void adds32_0ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } void adds23_2ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds8ImmOperands(MCInst &Inst, unsigned N) const { - addSignedImmOperands(Inst, N); - } - void adds8Imm64Operands(MCInst &Inst, unsigned N) const { + void adds8_0ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds6ImmOperands(MCInst &Inst, unsigned N) const { + void adds8_0Imm64Operands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds4ImmOperands(MCInst &Inst, unsigned N) const { + void adds6_0ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } void adds4_0ImmOperands(MCInst &Inst, unsigned N) const { @@ -408,22 +423,19 @@ public: void adds4_3ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds3ImmOperands(MCInst &Inst, unsigned N) const { + void adds3_0ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void addu64ImmOperands(MCInst &Inst, unsigned N) const { + void addu64_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu32ImmOperands(MCInst &Inst, unsigned N) const { + void addu32_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } void addu26_6ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu16ImmOperands(MCInst &Inst, unsigned N) const { - addImmOperands(Inst, N); - } void addu16_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } @@ -439,19 +451,16 @@ public: void addu11_3ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu10ImmOperands(MCInst &Inst, unsigned N) const { + void addu10_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu9ImmOperands(MCInst &Inst, unsigned N) const { + void addu9_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu8ImmOperands(MCInst &Inst, unsigned N) const { + void addu8_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu7ImmOperands(MCInst &Inst, unsigned N) const { - addImmOperands(Inst, N); - } - void addu6ImmOperands(MCInst &Inst, unsigned N) const { + void addu7_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } void addu6_0ImmOperands(MCInst &Inst, unsigned N) const { @@ -466,45 +475,45 @@ public: void addu6_3ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu5ImmOperands(MCInst &Inst, unsigned N) const { + void addu5_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu4ImmOperands(MCInst &Inst, unsigned N) const { + void addu4_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu3ImmOperands(MCInst &Inst, unsigned N) const { + void addu3_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu2ImmOperands(MCInst &Inst, unsigned N) const { + void addu2_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu1ImmOperands(MCInst &Inst, unsigned N) const { + void addu1_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addm6ImmOperands(MCInst &Inst, unsigned N) const { + void addm6_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addn8ImmOperands(MCInst &Inst, unsigned N) const { + void addn8_0ImmOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void adds16ExtOperands(MCInst &Inst, unsigned N) const { + void adds16_0ExtOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds12ExtOperands(MCInst &Inst, unsigned N) const { + void adds12_0ExtOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds10ExtOperands(MCInst &Inst, unsigned N) const { + void adds10_0ExtOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds9ExtOperands(MCInst &Inst, unsigned N) const { + void adds9_0ExtOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds8ExtOperands(MCInst &Inst, unsigned N) const { + void adds8_0ExtOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - void adds6ExtOperands(MCInst &Inst, unsigned N) const { + void adds6_0ExtOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } void adds11_0ExtOperands(MCInst &Inst, unsigned N) const { @@ -519,20 +528,20 @@ public: void adds11_3ExtOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } - - void addu6ExtOperands(MCInst &Inst, unsigned N) const { + void addn1ConstOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu7ExtOperands(MCInst &Inst, unsigned N) const { + + void addu7_0ExtOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu8ExtOperands(MCInst &Inst, unsigned N) const { + void addu8_0ExtOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu9ExtOperands(MCInst &Inst, unsigned N) const { + void addu9_0ExtOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu10ExtOperands(MCInst &Inst, unsigned N) const { + void addu10_0ExtOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } void addu6_0ExtOperands(MCInst &Inst, unsigned N) const { @@ -547,7 +556,7 @@ public: void addu6_3ExtOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } - void addu32MustExtOperands(MCInst &Inst, unsigned N) const { + void addu32_0MustExtOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } @@ -570,7 +579,7 @@ public: return StringRef(Tok.Data, Tok.Length); } - virtual void print(raw_ostream &OS) const; + void print(raw_ostream &OS) const override; static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) { HexagonOperand *Op = new HexagonOperand(Token); @@ -600,7 +609,7 @@ public: } }; -} // end anonymous namespace. +} // end anonymous namespace void HexagonOperand::print(raw_ostream &OS) const { switch (Kind) { @@ -630,67 +639,70 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { getContext(), MCB, &Check); - while (Check.getNextErrInfo() == true) { + while (Check.getNextErrInfo()) { unsigned Reg = Check.getErrRegister(); Twine R(RI->getName(Reg)); uint64_t Err = Check.getError(); if (Err != HexagonMCErrInfo::CHECK_SUCCESS) { if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err) - Error(IDLoc, - "unconditional branch cannot precede another branch in packet"); + return Error( + IDLoc, + "unconditional branch cannot precede another branch in packet"); if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err || HexagonMCErrInfo::CHECK_ERROR_NEWV & Err) - Error(IDLoc, "register `" + R + - "' used with `.new' " - "but not validly modified in the same packet"); + return Error(IDLoc, "register `" + R + + "' used with `.new' " + "but not validly modified in the same packet"); if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err) - Error(IDLoc, "register `" + R + "' modified more than once"); + return Error(IDLoc, "register `" + R + "' modified more than once"); if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err) - Error(IDLoc, "cannot write to read-only register `" + R + "'"); + return Error(IDLoc, "cannot write to read-only register `" + R + "'"); if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err) - Error(IDLoc, "loop-setup and some branch instructions " - "cannot be in the same packet"); + return Error(IDLoc, "loop-setup and some branch instructions " + "cannot be in the same packet"); if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) { Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1'); - Error(IDLoc, "packet marked with `:endloop" + N + "' " + + return Error(IDLoc, + "packet marked with `:endloop" + N + "' " + "cannot contain instructions that modify register " + "`" + R + "'"); } if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err) - Error(IDLoc, - "instruction cannot appear in packet with other instructions"); + return Error( + IDLoc, + "instruction cannot appear in packet with other instructions"); if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err) - Error(IDLoc, "too many slots used in packet"); + return Error(IDLoc, "too many slots used in packet"); if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) { uint64_t Erm = Check.getShuffleError(); if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm) - Error(IDLoc, "invalid instruction packet"); + return Error(IDLoc, "invalid instruction packet"); else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm) - Error(IDLoc, "invalid instruction packet: too many stores"); + return Error(IDLoc, "invalid instruction packet: too many stores"); else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm) - Error(IDLoc, "invalid instruction packet: too many loads"); + return Error(IDLoc, "invalid instruction packet: too many loads"); else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm) - Error(IDLoc, "too many branches in packet"); + return Error(IDLoc, "too many branches in packet"); else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm) - Error(IDLoc, "invalid instruction packet: out of slots"); + return Error(IDLoc, "invalid instruction packet: out of slots"); else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm) - Error(IDLoc, "invalid instruction packet: slot error"); + return Error(IDLoc, "invalid instruction packet: slot error"); else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm) - Error(IDLoc, "v60 packet violation"); + return Error(IDLoc, "v60 packet violation"); else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm) - Error(IDLoc, "slot 0 instruction does not allow slot 1 store"); + return Error(IDLoc, "slot 0 instruction does not allow slot 1 store"); else - Error(IDLoc, "unknown error in instruction packet"); + return Error(IDLoc, "unknown error in instruction packet"); } } @@ -878,7 +890,7 @@ bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) { return true; } bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) { - const MCExpr *Subsection = 0; + const MCExpr *Subsection = nullptr; int64_t Res; assert((getLexer().isNot(AsmToken::EndOfStatement)) && @@ -908,13 +920,13 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { int64_t MaxBytesToFill = 15; - // if there is an arguement + // if there is an argument if (getLexer().isNot(AsmToken::EndOfStatement)) { const MCExpr *Value; SMLoc ExprLoc = L; // Make sure we have a number (false is returned if expression is a number) - if (getParser().parseExpression(Value) == false) { + if (!getParser().parseExpression(Value)) { // Make sure this is a number that is in range const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); uint64_t IntValue = MCE->getValue(); @@ -936,8 +948,7 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { /// ::= .word [ expression (, expression)* ] bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { - - for (;;) { + while (true) { const MCExpr *Value; SMLoc ExprLoc = L; if (getParser().parseExpression(Value)) @@ -1062,15 +1073,15 @@ bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const { /// Force static initialization. extern "C" void LLVMInitializeHexagonAsmParser() { - RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget); + RegisterMCAsmParser<HexagonAsmParser> X(getTheHexagonTarget()); } #define GET_MATCHER_IMPLEMENTATION #define GET_REGISTER_MATCHER #include "HexagonGenAsmMatcher.inc" -namespace { -bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) { +static bool previousEqual(OperandVector &Operands, size_t Index, + StringRef String) { if (Index >= Operands.size()) return false; MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1]; @@ -1078,14 +1089,14 @@ bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) { return false; return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String); } -bool previousIsLoop(OperandVector &Operands, size_t Index) { + +static bool previousIsLoop(OperandVector &Operands, size_t Index) { return previousEqual(Operands, Index, "loop0") || previousEqual(Operands, Index, "loop1") || previousEqual(Operands, Index, "sp1loop0") || previousEqual(Operands, Index, "sp2loop0") || previousEqual(Operands, Index, "sp3loop0"); } -} bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) { AsmToken const &Token = getParser().getTok(); @@ -1174,8 +1185,7 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) { StringRef Raw (String.data(), Third.getString().data() - String.data() + Third.getString().size()); std::string Collapsed = Raw; - Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), - Collapsed.end()); + Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end()); StringRef Whole = Collapsed; std::pair<StringRef, StringRef> DotSplit = Whole.split('.'); if (!matchRegister(DotSplit.first.lower())) @@ -1219,8 +1229,7 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &En NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type)); } std::string Collapsed = RawString; - Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), - Collapsed.end()); + Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end()); StringRef FullString = Collapsed; std::pair<StringRef, StringRef> DotSplit = FullString.split('.'); unsigned DotReg = matchRegister(DotSplit.first.lower()); @@ -1277,7 +1286,7 @@ bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) { } bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) { - llvm::SmallVector<AsmToken, 4> Tokens; + SmallVector<AsmToken, 4> Tokens; MCAsmLexer &Lexer = getLexer(); bool Done = false; static char const * Comma = ","; @@ -1456,9 +1465,8 @@ bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info, return parseInstruction(Operands); } -namespace { -MCInst makeCombineInst(int opCode, MCOperand &Rdd, - MCOperand &MO1, MCOperand &MO2) { +static MCInst makeCombineInst(int opCode, MCOperand &Rdd, + MCOperand &MO1, MCOperand &MO2) { MCInst TmpInst; TmpInst.setOpcode(opCode); TmpInst.addOperand(Rdd); @@ -1467,7 +1475,6 @@ MCInst makeCombineInst(int opCode, MCOperand &Rdd, return TmpInst; } -} // Define this matcher function after the auto-generated include so we // have the match class enum definitions. @@ -1488,12 +1495,6 @@ unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, ? Match_Success : Match_InvalidOperand; } - case MCK__MINUS_1: { - int64_t Value; - return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1 - ? Match_Success - : Match_InvalidOperand; - } } if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) { StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length); @@ -1510,7 +1511,8 @@ unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_InvalidOperand; } -void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) { +// FIXME: Calls to OutOfRange shoudl propagate failure up to parseStatement. +bool HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) { std::string errStr; raw_string_ostream ES(errStr); ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: "; @@ -1518,7 +1520,7 @@ void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) { ES << "0-" << Max; else ES << Max << "-" << (-Max - 1); - Error(IDLoc, ES.str().c_str()); + return Parser.printError(IDLoc, ES.str()); } int HexagonAsmParser::processInstruction(MCInst &Inst, @@ -1599,11 +1601,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_tfrp: { MCOperand &MO = Inst.getOperand(1); unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + llvm::utostr(RegPairNum + 1); + std::string R1 = r + utostr(RegPairNum + 1); StringRef Reg1(R1); MO.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr(RegPairNum); + std::string R2 = r + utostr(RegPairNum); StringRef Reg2(R2); Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst.setOpcode(Hexagon::A2_combinew); @@ -1614,11 +1616,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_tfrpf: { MCOperand &MO = Inst.getOperand(2); unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + llvm::utostr(RegPairNum + 1); + std::string R1 = r + utostr(RegPairNum + 1); StringRef Reg1(R1); MO.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr(RegPairNum); + std::string R2 = r + utostr(RegPairNum); StringRef Reg2(R2); Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) @@ -1630,11 +1632,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_tfrpfnew: { MCOperand &MO = Inst.getOperand(2); unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + llvm::utostr(RegPairNum + 1); + std::string R1 = r + utostr(RegPairNum + 1); StringRef Reg1(R1); MO.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr(RegPairNum); + std::string R2 = r + utostr(RegPairNum); StringRef Reg2(R2); Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) @@ -1644,13 +1646,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, } // Translate a "$Vdd = $Vss" to "$Vdd = vcombine($Vs, $Vt)" - case Hexagon::HEXAGON_V6_vassignpair: { + case Hexagon::V6_vassignp: { MCOperand &MO = Inst.getOperand(1); unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = v + llvm::utostr(RegPairNum + 1); + std::string R1 = v + utostr(RegPairNum + 1); MO.setReg(MatchRegisterName(R1)); // Add a new operand for the second register in the pair. - std::string R2 = v + llvm::utostr(RegPairNum); + std::string R2 = v + utostr(RegPairNum); Inst.addOperand(MCOperand::createReg(MatchRegisterName(R2))); Inst.setOpcode(Hexagon::V6_vcombine); break; @@ -1658,14 +1660,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " case Hexagon::CONST32: - case Hexagon::CONST32_Float_Real: - case Hexagon::CONST32_Int_Real: - case Hexagon::FCONST32_nsdata: is32bit = true; // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) " - case Hexagon::CONST64_Float_Real: - case Hexagon::CONST64_Int_Real: - + case Hexagon::CONST64: // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) if (!Parser.getStreamer().hasRawTextSupport()) { MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer()); @@ -1725,8 +1722,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, getStreamer().EmitIntValue(Value, byteSize); } } else if (MO_1.isExpr()) { - const char *StringStart = 0; - const char *StringEnd = 0; + const char *StringStart = nullptr; + const char *StringEnd = nullptr; if (*Operands[4]->getStartLoc().getPointer() == '#') { StringStart = Operands[5]->getStartLoc().getPointer(); StringEnd = Operands[6]->getStartLoc().getPointer(); @@ -1832,10 +1829,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, break; } - case Hexagon::S2_tableidxb_goodsyntax: { + case Hexagon::S2_tableidxb_goodsyntax: Inst.setOpcode(Hexagon::S2_tableidxb); break; - } case Hexagon::S2_tableidxh_goodsyntax: { MCInst TmpInst; @@ -1894,10 +1890,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, break; } - case Hexagon::M2_mpyui: { + case Hexagon::M2_mpyui: Inst.setOpcode(Hexagon::M2_mpyi); break; - } case Hexagon::M2_mpysmi: { MCInst TmpInst; MCOperand &Rd = Inst.getOperand(0); @@ -1970,11 +1965,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1]) MCInst TmpInst; unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); - std::string R1 = r + llvm::utostr(RegPairNum + 1); + std::string R1 = r + utostr(RegPairNum + 1); StringRef Reg1(R1); Rss.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr(RegPairNum); + std::string R2 = r + utostr(RegPairNum); StringRef Reg2(R2); TmpInst.setOpcode(Hexagon::A2_combinew); TmpInst.addOperand(Rdd); @@ -1996,14 +1991,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 Inst.setOpcode(Hexagon::A4_boundscheck_hi); - std::string Name = - r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); + std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1); StringRef RegPair = Name; Rs.setReg(matchRegister(RegPair)); } else { // raw:lo Inst.setOpcode(Hexagon::A4_boundscheck_lo); - std::string Name = - r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); + std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum); StringRef RegPair = Name; Rs.setReg(matchRegister(RegPair)); } @@ -2015,14 +2008,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); if (RegNum & 1) { // Odd mapped to raw:hi Inst.setOpcode(Hexagon::A2_addsph); - std::string Name = - r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); + std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1); StringRef RegPair = Name; Rs.setReg(matchRegister(RegPair)); } else { // Even mapped raw:lo Inst.setOpcode(Hexagon::A2_addspl); - std::string Name = - r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); + std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum); StringRef RegPair = Name; Rs.setReg(matchRegister(RegPair)); } @@ -2034,14 +2025,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); if (RegNum & 1) { // Odd mapped to sat:raw:hi Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h); - std::string Name = - r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); + std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1); StringRef RegPair = Name; Rt.setReg(matchRegister(RegPair)); } else { // Even mapped sat:raw:lo Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l); - std::string Name = - r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); + std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum); StringRef RegPair = Name; Rt.setReg(matchRegister(RegPair)); } @@ -2056,14 +2045,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); if (RegNum & 1) { // Odd mapped to sat:raw:hi TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); - std::string Name = - r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); + std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1); StringRef RegPair = Name; Rt.setReg(matchRegister(RegPair)); } else { // Even mapped sat:raw:lo TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); - std::string Name = - r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); + std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum); StringRef RegPair = Name; Rt.setReg(matchRegister(RegPair)); } @@ -2081,14 +2068,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); - std::string Name = - r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); + std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1); StringRef RegPair = Name; Rt.setReg(matchRegister(RegPair)); } else { // Even mapped rnd:sat:raw:lo Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); - std::string Name = - r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); + std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum); StringRef RegPair = Name; Rt.setReg(matchRegister(RegPair)); } @@ -2124,11 +2109,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (Value == 0) { MCInst TmpInst; unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); - std::string R1 = r + llvm::utostr(RegPairNum + 1); + std::string R1 = r + utostr(RegPairNum + 1); StringRef Reg1(R1); Rss.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr(RegPairNum); + std::string R2 = r + utostr(RegPairNum); StringRef Reg2(R2); TmpInst.setOpcode(Hexagon::A2_combinew); TmpInst.addOperand(Rdd); @@ -2162,7 +2147,6 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, return Match_Success; } - unsigned HexagonAsmParser::matchRegister(StringRef Name) { if (unsigned Reg = MatchRegisterName(Name)) return Reg; diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp index d052a83..963fb99 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -53,28 +53,36 @@ // // The code below is intended to be fully target-independent. +#include "BitTracker.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" - -#include "BitTracker.h" +#include <iterator> +#include <cassert> +#include <cstdint> using namespace llvm; typedef BitTracker BT; namespace { + // Local trickery to pretty print a register (without the whole "%vreg" // business). struct printv { printv(unsigned r) : R(r) {} + unsigned R; }; + raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { if (PV.R) OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); @@ -82,9 +90,11 @@ namespace { OS << 's'; return OS; } -} + +} // end anonymous namespace namespace llvm { + raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) { switch (BV.Type) { case BT::BitValue::Top: @@ -167,6 +177,12 @@ namespace llvm { return OS; } + +} // end namespace llvm + +void BitTracker::print_cells(raw_ostream &OS) const { + for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) + dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; } BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) @@ -176,7 +192,6 @@ BitTracker::~BitTracker() { delete ⤅ } - // If we were allowed to update a cell for a part of a register, the meet // operation would need to be parametrized by the register number and the // exact part of the register, so that the computer BitRefs correspond to @@ -195,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { return Changed; } - // Insert the entire cell RC into the current cell at position given by M. BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, const BitMask &M) { @@ -218,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, return *this; } - BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { uint16_t B = M.first(), E = M.last(), W = width(); assert(B < W && E < W); @@ -237,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { return RC; } - BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { // Rotate left (i.e. towards increasing bit indices). // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1] @@ -259,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { return *this; } - BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, const BitValue &V) { assert(B <= E); @@ -268,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, return *this; } - BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { // Append the cell given as the argument to the "this" cell. // Bit 0 of RC becomes bit W of the result, where W is this->width(). @@ -279,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { return *this; } - uint16_t BT::RegisterCell::ct(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -289,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const { return C; } - uint16_t BT::RegisterCell::cl(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -299,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const { return C; } - bool BT::RegisterCell::operator== (const RegisterCell &RC) const { uint16_t W = Bits.size(); if (RC.Bits.size() != W) @@ -310,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const { return true; } - uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { // The general problem is with finding a register class that corresponds // to a given reference reg:sub. There can be several such classes, and @@ -336,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { return BW; } - BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, const CellMapType &M) const { uint16_t BW = getRegBitWidth(RR); @@ -364,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, return RegisterCell::top(BW); } - void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const { // While updating the cell map can be done in a meaningful way for @@ -382,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, M[RR.Reg] = RC; } - // Check if the cell represents a compile-time integer value. bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { uint16_t W = A.width(); @@ -392,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { return true; } - // Convert a cell to the integer value. The result must fit in uint64_t. uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { assert(isInt(A)); @@ -405,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { return Val; } - // Evaluator helper functions. These implement some common operation on // register cells that can be used to implement target-specific instructions // in a target-specific evaluator. @@ -420,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { const APInt &A = CI->getValue(); uint16_t BW = A.getBitWidth(); @@ -431,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -465,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -499,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, uint16_t Sh) const { assert(Sh <= A1.width()); @@ -531,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -542,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -554,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -577,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -600,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -621,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { uint16_t W = A1.width(); RegisterCell Res(W); @@ -637,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -646,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -655,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.cl(B), AW = A1.width(); @@ -666,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.ct(B), AW = A1.width(); @@ -677,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -689,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -699,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const { uint16_t W = A1.width(); @@ -712,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, const RegisterCell &A2, uint16_t AtN) const { uint16_t W1 = A1.width(), W2 = A2.width(); @@ -725,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, return Res; } - BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0"); uint16_t W = getRegBitWidth(Reg); @@ -779,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI, return true; } - // Main W-Z implementation. void BT::visitPHI(const MachineInstr &PI) { @@ -971,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { } } - void BT::visitUsesOf(unsigned Reg) { if (Trace) dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; @@ -991,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) { } } - BT::RegisterCell BT::get(RegisterRef RR) const { return ME.getCell(RR, Map); } - void BT::put(RegisterRef RR, const RegisterCell &RC) { ME.putCell(RR, RC, Map); } - // Replace all references to bits from OldRR with the corresponding bits // in NewRR. void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { @@ -1027,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { } } - // Check if the block has been "executed" during propagation. (If not, the // block is dead, but it may still appear to be reachable.) bool BT::reached(const MachineBasicBlock *B) const { @@ -1041,6 +1015,18 @@ bool BT::reached(const MachineBasicBlock *B) const { return false; } +// Visit an individual instruction. This could be a newly added instruction, +// or one that has been modified by an optimization. +void BT::visit(const MachineInstr &MI) { + assert(!MI.isBranch() && "Only non-branches are allowed"); + InstrExec.insert(&MI); + visitNonBranch(MI); + // The call to visitNonBranch could propagate the changes until a branch + // is actually visited. This could result in adding CFG edges to the flow + // queue. Since the queue won't be processed, clear it. + while (!FlowQ.empty()) + FlowQ.pop(); +} void BT::reset() { EdgeExec.clear(); @@ -1048,7 +1034,6 @@ void BT::reset() { Map.clear(); } - void BT::run() { reset(); assert(FlowQ.empty()); @@ -1118,10 +1103,6 @@ void BT::run() { } } // while (!FlowQ->empty()) - if (Trace) { - dbgs() << "Cells after propagation:\n"; - for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) - dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; - } + if (Trace) + print_cells(dbgs() << "Cells after propagation:\n"); } - diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h index 5b925fe..48c5f22 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h @@ -1,4 +1,4 @@ -//===--- BitTracker.h -----------------------------------------------------===// +//===--- BitTracker.h -------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,24 +7,27 @@ // //===----------------------------------------------------------------------===// -#ifndef BITTRACKER_H -#define BITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" - +#include "llvm/CodeGen/MachineOperand.h" +#include <cassert> +#include <cstdint> #include <map> #include <queue> #include <set> +#include <utility> namespace llvm { - class ConstantInt; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineInstr; - class MachineOperand; - class raw_ostream; + +class ConstantInt; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineInstr; +class raw_ostream; struct BitTracker { struct BitRef; @@ -49,6 +52,9 @@ struct BitTracker { void put(RegisterRef RR, const RegisterCell &RC); void subst(RegisterRef OldRR, RegisterRef NewRR); bool reached(const MachineBasicBlock *B) const; + void visit(const MachineInstr &MI); + + void print_cells(raw_ostream &OS) const; private: void visitPHI(const MachineInstr &PI); @@ -73,19 +79,19 @@ private: CellMapType ⤅ }; - // Abstraction of a reference to bit at position Pos from a register Reg. struct BitTracker::BitRef { BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} + bool operator== (const BitRef &BR) const { // If Reg is 0, disregard Pos. return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); } + unsigned Reg; uint16_t Pos; }; - // Abstraction of a register reference in MachineOperand. It contains the // register number and the subregister index. struct BitTracker::RegisterRef { @@ -93,10 +99,10 @@ struct BitTracker::RegisterRef { : Reg(R), Sub(S) {} RegisterRef(const MachineOperand &MO) : Reg(MO.getReg()), Sub(MO.getSubReg()) {} + unsigned Reg, Sub; }; - // Value that a single bit can take. This is outside of the context of // any register, it is more of an abstraction of the two-element set of // possible bit values. One extension here is the "Ref" type, which @@ -155,6 +161,7 @@ struct BitTracker::BitValue { bool operator!= (const BitValue &V) const { return !operator==(V); } + bool is(unsigned T) const { assert(T == 0 || T == 1); return T == 0 ? Type == Zero @@ -206,6 +213,7 @@ struct BitTracker::BitValue { bool num() const { return Type == Zero || Type == One; } + operator bool() const { assert(Type == Zero || Type == One); return Type == One; @@ -214,7 +222,6 @@ struct BitTracker::BitValue { friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV); }; - // This operation must be idempotent, i.e. ref(ref(V)) == ref(V). inline BitTracker::BitValue BitTracker::BitValue::ref(const BitValue &V) { @@ -225,26 +232,26 @@ BitTracker::BitValue::ref(const BitValue &V) { return self(); } - inline BitTracker::BitValue BitTracker::BitValue::self(const BitRef &Self) { return BitValue(Self.Reg, Self.Pos); } - // A sequence of bits starting from index B up to and including index E. // If E < B, the mask represents two sections: [0..E] and [B..W) where // W is the width of the register. struct BitTracker::BitMask { - BitMask() : B(0), E(0) {} + BitMask() = default; BitMask(uint16_t b, uint16_t e) : B(b), E(e) {} + uint16_t first() const { return B; } uint16_t last() const { return E; } + private: - uint16_t B, E; + uint16_t B = 0; + uint16_t E = 0; }; - // Representation of a register: a list of BitValues. struct BitTracker::RegisterCell { RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {} @@ -252,6 +259,7 @@ struct BitTracker::RegisterCell { uint16_t width() const { return Bits.size(); } + const BitValue &operator[](uint16_t BitN) const { assert(BitN < Bits.size()); return Bits[BitN]; @@ -294,12 +302,10 @@ private: friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC); }; - inline bool BitTracker::has(unsigned Reg) const { return Map.find(Reg) != Map.end(); } - inline const BitTracker::RegisterCell& BitTracker::lookup(unsigned Reg) const { CellMapType::const_iterator F = Map.find(Reg); @@ -307,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const { return F->second; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { RegisterCell RC(Width); @@ -316,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::top(uint16_t Width) { RegisterCell RC(Width); @@ -325,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::ref(const RegisterCell &C) { uint16_t W = C.width(); @@ -342,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) { struct BitTracker::MachineEvaluator { MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M) : TRI(T), MRI(M) {} - virtual ~MachineEvaluator() {} + virtual ~MachineEvaluator() = default; uint16_t getRegBitWidth(const RegisterRef &RR) const; RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const; void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const; + // A result of any operation should use refs to the source cells, not // the cells directly. This function is a convenience wrapper to quickly // generate a ref for a cell corresponding to a register reference. @@ -432,4 +436,4 @@ struct BitTracker::MachineEvaluator { } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 7bc08ec..c05fbc1 100644 --- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -14,22 +14,23 @@ #include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonInstPrinter.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LEB128.h" -#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> using namespace llvm; using namespace Hexagon; @@ -37,11 +38,13 @@ using namespace Hexagon; typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { + /// \brief Hexagon disassembler for all Hexagon platforms. class HexagonDisassembler : public MCDisassembler { public: std::unique_ptr<MCInstrInfo const> const MCII; std::unique_ptr<MCInst *> CurrentBundle; + HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {} @@ -58,7 +61,8 @@ public: void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const; void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const; }; -} + +} // end anonymous namespace // Forward declare these because the auto-generated code will reference them. // Definitions are further down. @@ -105,9 +109,9 @@ static unsigned getRegFromSubinstEncoding(unsigned encoded_reg); static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); -static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, +static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); -static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, +static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); @@ -117,9 +121,9 @@ static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); -static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, +static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); -static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, +static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); @@ -147,7 +151,7 @@ static MCDisassembler *createHexagonDisassembler(const Target &T, } extern "C" void LLVMInitializeHexagonDisassembler() { - TargetRegistry::RegisterMCDisassembler(TheHexagonTarget, + TargetRegistry::RegisterMCDisassembler(getTheHexagonTarget(), createHexagonDisassembler); } @@ -162,7 +166,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, *CurrentBundle = &MI; MI = HexagonMCInstrInfo::createBundle(); - while (Result == Success && Complete == false) { + while (Result == Success && !Complete) { if (Bytes.size() < HEXAGON_INSTR_SIZE) return MCDisassembler::Fail; MCInst *Inst = new (getContext()) MCInst; @@ -179,14 +183,13 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return MCDisassembler::Success; } -namespace { -HexagonDisassembler const &disassembler(void const *Decoder) { +static HexagonDisassembler const &disassembler(void const *Decoder) { return *static_cast<HexagonDisassembler const *>(Decoder); } -MCContext &contextFromDecoder(void const *Decoder) { + +static MCContext &contextFromDecoder(void const *Decoder) { return disassembler(Decoder).getContext(); } -} DecodeStatus HexagonDisassembler::getSingleInstruction( MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -329,8 +332,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( // follow the duplex model, so the register values in the MCInst are // incorrect. If the instruction is a compound, loop through the // operands and change registers appropriately. - if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) == - HexagonII::TypeCOMPOUND) { + if (HexagonMCInstrInfo::getType(*MCII, MI) == HexagonII::TypeCOMPOUND) { for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) { if (i->isReg()) { unsigned reg = i->getReg() - Hexagon::R0; @@ -341,6 +343,37 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( } } + switch(MI.getOpcode()) { + case Hexagon::J4_cmpeqn1_f_jumpnv_nt: + case Hexagon::J4_cmpeqn1_f_jumpnv_t: + case Hexagon::J4_cmpeqn1_fp0_jump_nt: + case Hexagon::J4_cmpeqn1_fp0_jump_t: + case Hexagon::J4_cmpeqn1_fp1_jump_nt: + case Hexagon::J4_cmpeqn1_fp1_jump_t: + case Hexagon::J4_cmpeqn1_t_jumpnv_nt: + case Hexagon::J4_cmpeqn1_t_jumpnv_t: + case Hexagon::J4_cmpeqn1_tp0_jump_nt: + case Hexagon::J4_cmpeqn1_tp0_jump_t: + case Hexagon::J4_cmpeqn1_tp1_jump_nt: + case Hexagon::J4_cmpeqn1_tp1_jump_t: + case Hexagon::J4_cmpgtn1_f_jumpnv_nt: + case Hexagon::J4_cmpgtn1_f_jumpnv_t: + case Hexagon::J4_cmpgtn1_fp0_jump_nt: + case Hexagon::J4_cmpgtn1_fp0_jump_t: + case Hexagon::J4_cmpgtn1_fp1_jump_nt: + case Hexagon::J4_cmpgtn1_fp1_jump_t: + case Hexagon::J4_cmpgtn1_t_jumpnv_nt: + case Hexagon::J4_cmpgtn1_t_jumpnv_t: + case Hexagon::J4_cmpgtn1_tp0_jump_nt: + case Hexagon::J4_cmpgtn1_tp0_jump_t: + case Hexagon::J4_cmpgtn1_tp1_jump_nt: + case Hexagon::J4_cmpgtn1_tp1_jump_t: + MI.insert(MI.begin() + 1, MCOperand::createExpr(MCConstantExpr::create(-1, getContext()))); + break; + default: + break; + } + if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) { unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI); MCOperand &MCO = MI.getOperand(OpIndex); @@ -417,46 +450,46 @@ void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI, // GP relative instruction in the absence of the corresponding immediate // extender. switch (MCI.getOpcode()) { - case Hexagon::S2_storerbabs: + case Hexagon::PS_storerbabs: opcode = Hexagon::S2_storerbgp; break; - case Hexagon::S2_storerhabs: + case Hexagon::PS_storerhabs: opcode = Hexagon::S2_storerhgp; break; - case Hexagon::S2_storerfabs: + case Hexagon::PS_storerfabs: opcode = Hexagon::S2_storerfgp; break; - case Hexagon::S2_storeriabs: + case Hexagon::PS_storeriabs: opcode = Hexagon::S2_storerigp; break; - case Hexagon::S2_storerbnewabs: + case Hexagon::PS_storerbnewabs: opcode = Hexagon::S2_storerbnewgp; break; - case Hexagon::S2_storerhnewabs: + case Hexagon::PS_storerhnewabs: opcode = Hexagon::S2_storerhnewgp; break; - case Hexagon::S2_storerinewabs: + case Hexagon::PS_storerinewabs: opcode = Hexagon::S2_storerinewgp; break; - case Hexagon::S2_storerdabs: + case Hexagon::PS_storerdabs: opcode = Hexagon::S2_storerdgp; break; - case Hexagon::L4_loadrb_abs: + case Hexagon::PS_loadrbabs: opcode = Hexagon::L2_loadrbgp; break; - case Hexagon::L4_loadrub_abs: + case Hexagon::PS_loadrubabs: opcode = Hexagon::L2_loadrubgp; break; - case Hexagon::L4_loadrh_abs: + case Hexagon::PS_loadrhabs: opcode = Hexagon::L2_loadrhgp; break; - case Hexagon::L4_loadruh_abs: + case Hexagon::PS_loadruhabs: opcode = Hexagon::L2_loadruhgp; break; - case Hexagon::L4_loadri_abs: + case Hexagon::PS_loadriabs: opcode = Hexagon::L2_loadrigp; break; - case Hexagon::L4_loadrd_abs: + case Hexagon::PS_loadrdabs: opcode = Hexagon::L2_loadrdgp; break; default: @@ -466,10 +499,6 @@ void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI, } } -namespace llvm { -extern const MCInstrDesc HexagonInsts[]; -} - static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, ArrayRef<MCPhysReg> Table) { if (RegNo < Table.size()) { @@ -621,11 +650,8 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -namespace { -uint32_t fullValue(MCInstrInfo const &MCII, - MCInst &MCB, - MCInst &MI, - int64_t Value) { +static uint32_t fullValue(MCInstrInfo const &MCII, MCInst &MCB, MCInst &MI, + int64_t Value) { MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex( MCB, HexagonMCInstrInfo::bundleSize(MCB)); if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI)) @@ -639,8 +665,9 @@ uint32_t fullValue(MCInstrInfo const &MCII, uint32_t Operand = Upper26 | Lower6; return Operand; } + template <size_t T> -void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { +static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { HexagonDisassembler const &Disassembler = disassembler(Decoder); int64_t FullValue = fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, @@ -649,7 +676,6 @@ void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); } -} static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, @@ -663,13 +689,13 @@ static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, return MCDisassembler::Success; } -static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, +static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { signedDecoder<16>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, +static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { signedDecoder<12>(MI, tmp, Decoder); return MCDisassembler::Success; @@ -699,13 +725,13 @@ static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, return MCDisassembler::Success; } -static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, +static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { signedDecoder<10>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, +static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { signedDecoder<8>(MI, tmp, Decoder); return MCDisassembler::Success; @@ -811,25 +837,24 @@ static const unsigned int StoreConditionalOpcodeData[][2] = { // HexagonII::INST_ICLASS_LD // HexagonII::INST_ICLASS_LD_ST_2 -static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000}, - {L4_loadri_abs, 0x49800000}, - {L4_loadruh_abs, 0x49600000}, - {L4_loadrh_abs, 0x49400000}, - {L4_loadrub_abs, 0x49200000}, - {L4_loadrb_abs, 0x49000000}, - {S2_storerdabs, 0x48c00000}, - {S2_storerinewabs, 0x48a01000}, - {S2_storerhnewabs, 0x48a00800}, - {S2_storerbnewabs, 0x48a00000}, - {S2_storeriabs, 0x48800000}, - {S2_storerfabs, 0x48600000}, - {S2_storerhabs, 0x48400000}, - {S2_storerbabs, 0x48000000}}; +static unsigned int LoadStoreOpcodeData[][2] = {{PS_loadrdabs, 0x49c00000}, + {PS_loadriabs, 0x49800000}, + {PS_loadruhabs, 0x49600000}, + {PS_loadrhabs, 0x49400000}, + {PS_loadrubabs, 0x49200000}, + {PS_loadrbabs, 0x49000000}, + {PS_storerdabs, 0x48c00000}, + {PS_storerinewabs, 0x48a01000}, + {PS_storerhnewabs, 0x48a00800}, + {PS_storerbnewabs, 0x48a00000}, + {PS_storeriabs, 0x48800000}, + {PS_storerfabs, 0x48600000}, + {PS_storerhabs, 0x48400000}, + {PS_storerbabs, 0x48000000}}; static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData); static const size_t NumLS = array_lengthof(LoadStoreOpcodeData); static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { - unsigned MachineOpcode = 0; unsigned LLVMOpcode = 0; @@ -868,19 +893,18 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { case Hexagon::S4_pstorerdf_abs: case Hexagon::S4_pstorerdt_abs: case Hexagon::S4_pstorerdfnew_abs: - case Hexagon::S4_pstorerdtnew_abs: { + case Hexagon::S4_pstorerdtnew_abs: // op: Pv Value = insn & UINT64_C(3); - DecodePredRegsRegisterClass(MI, Value, 0, 0); + DecodePredRegsRegisterClass(MI, Value, 0, nullptr); // op: u6 Value = (insn >> 12) & UINT64_C(48); Value |= (insn >> 3) & UINT64_C(15); MI.addOperand(MCOperand::createImm(Value)); // op: Rtt Value = (insn >> 8) & UINT64_C(31); - DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr); break; - } case Hexagon::S4_pstorerbnewf_abs: case Hexagon::S4_pstorerbnewt_abs: @@ -893,19 +917,18 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { case Hexagon::S4_pstorerinewf_abs: case Hexagon::S4_pstorerinewt_abs: case Hexagon::S4_pstorerinewfnew_abs: - case Hexagon::S4_pstorerinewtnew_abs: { + case Hexagon::S4_pstorerinewtnew_abs: // op: Pv Value = insn & UINT64_C(3); - DecodePredRegsRegisterClass(MI, Value, 0, 0); + DecodePredRegsRegisterClass(MI, Value, 0, nullptr); // op: u6 Value = (insn >> 12) & UINT64_C(48); Value |= (insn >> 3) & UINT64_C(15); MI.addOperand(MCOperand::createImm(Value)); // op: Nt Value = (insn >> 8) & UINT64_C(7); - DecodeIntRegsRegisterClass(MI, Value, 0, 0); + DecodeIntRegsRegisterClass(MI, Value, 0, nullptr); break; - } case Hexagon::S4_pstorerbf_abs: case Hexagon::S4_pstorerbt_abs: @@ -918,36 +941,34 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { case Hexagon::S4_pstorerif_abs: case Hexagon::S4_pstorerit_abs: case Hexagon::S4_pstorerifnew_abs: - case Hexagon::S4_pstoreritnew_abs: { + case Hexagon::S4_pstoreritnew_abs: // op: Pv Value = insn & UINT64_C(3); - DecodePredRegsRegisterClass(MI, Value, 0, 0); + DecodePredRegsRegisterClass(MI, Value, 0, nullptr); // op: u6 Value = (insn >> 12) & UINT64_C(48); Value |= (insn >> 3) & UINT64_C(15); MI.addOperand(MCOperand::createImm(Value)); // op: Rt Value = (insn >> 8) & UINT64_C(31); - DecodeIntRegsRegisterClass(MI, Value, 0, 0); + DecodeIntRegsRegisterClass(MI, Value, 0, nullptr); break; - } case Hexagon::L4_ploadrdf_abs: case Hexagon::L4_ploadrdt_abs: case Hexagon::L4_ploadrdfnew_abs: - case Hexagon::L4_ploadrdtnew_abs: { + case Hexagon::L4_ploadrdtnew_abs: // op: Rdd Value = insn & UINT64_C(31); - DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr); // op: Pt Value = ((insn >> 9) & UINT64_C(3)); - DecodePredRegsRegisterClass(MI, Value, 0, 0); + DecodePredRegsRegisterClass(MI, Value, 0, nullptr); // op: u6 Value = ((insn >> 15) & UINT64_C(62)); Value |= ((insn >> 8) & UINT64_C(1)); MI.addOperand(MCOperand::createImm(Value)); break; - } case Hexagon::L4_ploadrbf_abs: case Hexagon::L4_ploadrbt_abs: @@ -971,10 +992,10 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { case Hexagon::L4_ploadritnew_abs: // op: Rd Value = insn & UINT64_C(31); - DecodeIntRegsRegisterClass(MI, Value, 0, 0); + DecodeIntRegsRegisterClass(MI, Value, 0, nullptr); // op: Pt Value = (insn >> 9) & UINT64_C(3); - DecodePredRegsRegisterClass(MI, Value, 0, 0); + DecodePredRegsRegisterClass(MI, Value, 0, nullptr); // op: u6 Value = (insn >> 15) & UINT64_C(62); Value |= (insn >> 8) & UINT64_C(1); @@ -982,36 +1003,34 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { break; // op: g16_2 - case (Hexagon::L4_loadri_abs): + case (Hexagon::PS_loadriabs): ++shift; // op: g16_1 - case Hexagon::L4_loadrh_abs: - case Hexagon::L4_loadruh_abs: + case Hexagon::PS_loadrhabs: + case Hexagon::PS_loadruhabs: ++shift; // op: g16_0 - case Hexagon::L4_loadrb_abs: - case Hexagon::L4_loadrub_abs: { + case Hexagon::PS_loadrbabs: + case Hexagon::PS_loadrubabs: // op: Rd Value |= insn & UINT64_C(31); - DecodeIntRegsRegisterClass(MI, Value, 0, 0); + DecodeIntRegsRegisterClass(MI, Value, 0, nullptr); Value = (insn >> 11) & UINT64_C(49152); Value |= (insn >> 7) & UINT64_C(15872); Value |= (insn >> 5) & UINT64_C(511); MI.addOperand(MCOperand::createImm(Value << shift)); break; - } - case Hexagon::L4_loadrd_abs: { + case Hexagon::PS_loadrdabs: Value = insn & UINT64_C(31); - DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr); Value = (insn >> 11) & UINT64_C(49152); Value |= (insn >> 7) & UINT64_C(15872); Value |= (insn >> 5) & UINT64_C(511); MI.addOperand(MCOperand::createImm(Value << 3)); break; - } - case Hexagon::S2_storerdabs: { + case Hexagon::PS_storerdabs: // op: g16_3 Value = (insn >> 11) & UINT64_C(49152); Value |= (insn >> 7) & UINT64_C(15872); @@ -1020,18 +1039,17 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { MI.addOperand(MCOperand::createImm(Value << 3)); // op: Rtt Value = (insn >> 8) & UINT64_C(31); - DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr); break; - } // op: g16_2 - case Hexagon::S2_storerinewabs: + case Hexagon::PS_storerinewabs: ++shift; // op: g16_1 - case Hexagon::S2_storerhnewabs: + case Hexagon::PS_storerhnewabs: ++shift; // op: g16_0 - case Hexagon::S2_storerbnewabs: { + case Hexagon::PS_storerbnewabs: Value = (insn >> 11) & UINT64_C(49152); Value |= (insn >> 7) & UINT64_C(15872); Value |= (insn >> 5) & UINT64_C(256); @@ -1039,19 +1057,18 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { MI.addOperand(MCOperand::createImm(Value << shift)); // op: Nt Value = (insn >> 8) & UINT64_C(7); - DecodeIntRegsRegisterClass(MI, Value, 0, 0); + DecodeIntRegsRegisterClass(MI, Value, 0, nullptr); break; - } // op: g16_2 - case Hexagon::S2_storeriabs: + case Hexagon::PS_storeriabs: ++shift; // op: g16_1 - case Hexagon::S2_storerhabs: - case Hexagon::S2_storerfabs: + case Hexagon::PS_storerhabs: + case Hexagon::PS_storerfabs: ++shift; // op: g16_0 - case Hexagon::S2_storerbabs: { + case Hexagon::PS_storerbabs: Value = (insn >> 11) & UINT64_C(49152); Value |= (insn >> 7) & UINT64_C(15872); Value |= (insn >> 5) & UINT64_C(256); @@ -1059,10 +1076,9 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { MI.addOperand(MCOperand::createImm(Value << shift)); // op: Rt Value = (insn >> 8) & UINT64_C(31); - DecodeIntRegsRegisterClass(MI, Value, 0, 0); + DecodeIntRegsRegisterClass(MI, Value, 0, nullptr); break; } - } return MCDisassembler::Success; } return MCDisassembler::Fail; @@ -1070,7 +1086,6 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, void const *Decoder) { - // Instruction Class for a constant a extender: bits 31:28 = 0x0000 if ((~insn & 0xf0000000) == 0xf0000000) { unsigned Value; @@ -1087,244 +1102,244 @@ static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, // These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td enum subInstBinaryValues { - V4_SA1_addi_BITS = 0x0000, - V4_SA1_addi_MASK = 0x1800, - V4_SA1_addrx_BITS = 0x1800, - V4_SA1_addrx_MASK = 0x1f00, - V4_SA1_addsp_BITS = 0x0c00, - V4_SA1_addsp_MASK = 0x1c00, - V4_SA1_and1_BITS = 0x1200, - V4_SA1_and1_MASK = 0x1f00, - V4_SA1_clrf_BITS = 0x1a70, - V4_SA1_clrf_MASK = 0x1e70, - V4_SA1_clrfnew_BITS = 0x1a50, - V4_SA1_clrfnew_MASK = 0x1e70, - V4_SA1_clrt_BITS = 0x1a60, - V4_SA1_clrt_MASK = 0x1e70, - V4_SA1_clrtnew_BITS = 0x1a40, - V4_SA1_clrtnew_MASK = 0x1e70, - V4_SA1_cmpeqi_BITS = 0x1900, - V4_SA1_cmpeqi_MASK = 0x1f00, - V4_SA1_combine0i_BITS = 0x1c00, - V4_SA1_combine0i_MASK = 0x1d18, - V4_SA1_combine1i_BITS = 0x1c08, - V4_SA1_combine1i_MASK = 0x1d18, - V4_SA1_combine2i_BITS = 0x1c10, - V4_SA1_combine2i_MASK = 0x1d18, - V4_SA1_combine3i_BITS = 0x1c18, - V4_SA1_combine3i_MASK = 0x1d18, - V4_SA1_combinerz_BITS = 0x1d08, - V4_SA1_combinerz_MASK = 0x1d08, - V4_SA1_combinezr_BITS = 0x1d00, - V4_SA1_combinezr_MASK = 0x1d08, - V4_SA1_dec_BITS = 0x1300, - V4_SA1_dec_MASK = 0x1f00, - V4_SA1_inc_BITS = 0x1100, - V4_SA1_inc_MASK = 0x1f00, - V4_SA1_seti_BITS = 0x0800, - V4_SA1_seti_MASK = 0x1c00, - V4_SA1_setin1_BITS = 0x1a00, - V4_SA1_setin1_MASK = 0x1e40, - V4_SA1_sxtb_BITS = 0x1500, - V4_SA1_sxtb_MASK = 0x1f00, - V4_SA1_sxth_BITS = 0x1400, - V4_SA1_sxth_MASK = 0x1f00, - V4_SA1_tfr_BITS = 0x1000, - V4_SA1_tfr_MASK = 0x1f00, - V4_SA1_zxtb_BITS = 0x1700, - V4_SA1_zxtb_MASK = 0x1f00, - V4_SA1_zxth_BITS = 0x1600, - V4_SA1_zxth_MASK = 0x1f00, - V4_SL1_loadri_io_BITS = 0x0000, - V4_SL1_loadri_io_MASK = 0x1000, - V4_SL1_loadrub_io_BITS = 0x1000, - V4_SL1_loadrub_io_MASK = 0x1000, - V4_SL2_deallocframe_BITS = 0x1f00, - V4_SL2_deallocframe_MASK = 0x1fc0, - V4_SL2_jumpr31_BITS = 0x1fc0, - V4_SL2_jumpr31_MASK = 0x1fc4, - V4_SL2_jumpr31_f_BITS = 0x1fc5, - V4_SL2_jumpr31_f_MASK = 0x1fc7, - V4_SL2_jumpr31_fnew_BITS = 0x1fc7, - V4_SL2_jumpr31_fnew_MASK = 0x1fc7, - V4_SL2_jumpr31_t_BITS = 0x1fc4, - V4_SL2_jumpr31_t_MASK = 0x1fc7, - V4_SL2_jumpr31_tnew_BITS = 0x1fc6, - V4_SL2_jumpr31_tnew_MASK = 0x1fc7, - V4_SL2_loadrb_io_BITS = 0x1000, - V4_SL2_loadrb_io_MASK = 0x1800, - V4_SL2_loadrd_sp_BITS = 0x1e00, - V4_SL2_loadrd_sp_MASK = 0x1f00, - V4_SL2_loadrh_io_BITS = 0x0000, - V4_SL2_loadrh_io_MASK = 0x1800, - V4_SL2_loadri_sp_BITS = 0x1c00, - V4_SL2_loadri_sp_MASK = 0x1e00, - V4_SL2_loadruh_io_BITS = 0x0800, - V4_SL2_loadruh_io_MASK = 0x1800, - V4_SL2_return_BITS = 0x1f40, - V4_SL2_return_MASK = 0x1fc4, - V4_SL2_return_f_BITS = 0x1f45, - V4_SL2_return_f_MASK = 0x1fc7, - V4_SL2_return_fnew_BITS = 0x1f47, - V4_SL2_return_fnew_MASK = 0x1fc7, - V4_SL2_return_t_BITS = 0x1f44, - V4_SL2_return_t_MASK = 0x1fc7, - V4_SL2_return_tnew_BITS = 0x1f46, - V4_SL2_return_tnew_MASK = 0x1fc7, - V4_SS1_storeb_io_BITS = 0x1000, - V4_SS1_storeb_io_MASK = 0x1000, - V4_SS1_storew_io_BITS = 0x0000, - V4_SS1_storew_io_MASK = 0x1000, - V4_SS2_allocframe_BITS = 0x1c00, - V4_SS2_allocframe_MASK = 0x1e00, - V4_SS2_storebi0_BITS = 0x1200, - V4_SS2_storebi0_MASK = 0x1f00, - V4_SS2_storebi1_BITS = 0x1300, - V4_SS2_storebi1_MASK = 0x1f00, - V4_SS2_stored_sp_BITS = 0x0a00, - V4_SS2_stored_sp_MASK = 0x1e00, - V4_SS2_storeh_io_BITS = 0x0000, - V4_SS2_storeh_io_MASK = 0x1800, - V4_SS2_storew_sp_BITS = 0x0800, - V4_SS2_storew_sp_MASK = 0x1e00, - V4_SS2_storewi0_BITS = 0x1000, - V4_SS2_storewi0_MASK = 0x1f00, - V4_SS2_storewi1_BITS = 0x1100, - V4_SS2_storewi1_MASK = 0x1f00 + SA1_addi_BITS = 0x0000, + SA1_addi_MASK = 0x1800, + SA1_addrx_BITS = 0x1800, + SA1_addrx_MASK = 0x1f00, + SA1_addsp_BITS = 0x0c00, + SA1_addsp_MASK = 0x1c00, + SA1_and1_BITS = 0x1200, + SA1_and1_MASK = 0x1f00, + SA1_clrf_BITS = 0x1a70, + SA1_clrf_MASK = 0x1e70, + SA1_clrfnew_BITS = 0x1a50, + SA1_clrfnew_MASK = 0x1e70, + SA1_clrt_BITS = 0x1a60, + SA1_clrt_MASK = 0x1e70, + SA1_clrtnew_BITS = 0x1a40, + SA1_clrtnew_MASK = 0x1e70, + SA1_cmpeqi_BITS = 0x1900, + SA1_cmpeqi_MASK = 0x1f00, + SA1_combine0i_BITS = 0x1c00, + SA1_combine0i_MASK = 0x1d18, + SA1_combine1i_BITS = 0x1c08, + SA1_combine1i_MASK = 0x1d18, + SA1_combine2i_BITS = 0x1c10, + SA1_combine2i_MASK = 0x1d18, + SA1_combine3i_BITS = 0x1c18, + SA1_combine3i_MASK = 0x1d18, + SA1_combinerz_BITS = 0x1d08, + SA1_combinerz_MASK = 0x1d08, + SA1_combinezr_BITS = 0x1d00, + SA1_combinezr_MASK = 0x1d08, + SA1_dec_BITS = 0x1300, + SA1_dec_MASK = 0x1f00, + SA1_inc_BITS = 0x1100, + SA1_inc_MASK = 0x1f00, + SA1_seti_BITS = 0x0800, + SA1_seti_MASK = 0x1c00, + SA1_setin1_BITS = 0x1a00, + SA1_setin1_MASK = 0x1e40, + SA1_sxtb_BITS = 0x1500, + SA1_sxtb_MASK = 0x1f00, + SA1_sxth_BITS = 0x1400, + SA1_sxth_MASK = 0x1f00, + SA1_tfr_BITS = 0x1000, + SA1_tfr_MASK = 0x1f00, + SA1_zxtb_BITS = 0x1700, + SA1_zxtb_MASK = 0x1f00, + SA1_zxth_BITS = 0x1600, + SA1_zxth_MASK = 0x1f00, + SL1_loadri_io_BITS = 0x0000, + SL1_loadri_io_MASK = 0x1000, + SL1_loadrub_io_BITS = 0x1000, + SL1_loadrub_io_MASK = 0x1000, + SL2_deallocframe_BITS = 0x1f00, + SL2_deallocframe_MASK = 0x1fc0, + SL2_jumpr31_BITS = 0x1fc0, + SL2_jumpr31_MASK = 0x1fc4, + SL2_jumpr31_f_BITS = 0x1fc5, + SL2_jumpr31_f_MASK = 0x1fc7, + SL2_jumpr31_fnew_BITS = 0x1fc7, + SL2_jumpr31_fnew_MASK = 0x1fc7, + SL2_jumpr31_t_BITS = 0x1fc4, + SL2_jumpr31_t_MASK = 0x1fc7, + SL2_jumpr31_tnew_BITS = 0x1fc6, + SL2_jumpr31_tnew_MASK = 0x1fc7, + SL2_loadrb_io_BITS = 0x1000, + SL2_loadrb_io_MASK = 0x1800, + SL2_loadrd_sp_BITS = 0x1e00, + SL2_loadrd_sp_MASK = 0x1f00, + SL2_loadrh_io_BITS = 0x0000, + SL2_loadrh_io_MASK = 0x1800, + SL2_loadri_sp_BITS = 0x1c00, + SL2_loadri_sp_MASK = 0x1e00, + SL2_loadruh_io_BITS = 0x0800, + SL2_loadruh_io_MASK = 0x1800, + SL2_return_BITS = 0x1f40, + SL2_return_MASK = 0x1fc4, + SL2_return_f_BITS = 0x1f45, + SL2_return_f_MASK = 0x1fc7, + SL2_return_fnew_BITS = 0x1f47, + SL2_return_fnew_MASK = 0x1fc7, + SL2_return_t_BITS = 0x1f44, + SL2_return_t_MASK = 0x1fc7, + SL2_return_tnew_BITS = 0x1f46, + SL2_return_tnew_MASK = 0x1fc7, + SS1_storeb_io_BITS = 0x1000, + SS1_storeb_io_MASK = 0x1000, + SS1_storew_io_BITS = 0x0000, + SS1_storew_io_MASK = 0x1000, + SS2_allocframe_BITS = 0x1c00, + SS2_allocframe_MASK = 0x1e00, + SS2_storebi0_BITS = 0x1200, + SS2_storebi0_MASK = 0x1f00, + SS2_storebi1_BITS = 0x1300, + SS2_storebi1_MASK = 0x1f00, + SS2_stored_sp_BITS = 0x0a00, + SS2_stored_sp_MASK = 0x1e00, + SS2_storeh_io_BITS = 0x0000, + SS2_storeh_io_MASK = 0x1800, + SS2_storew_sp_BITS = 0x0800, + SS2_storew_sp_MASK = 0x1e00, + SS2_storewi0_BITS = 0x1000, + SS2_storewi0_MASK = 0x1f00, + SS2_storewi1_BITS = 0x1100, + SS2_storewi1_MASK = 0x1f00 }; static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, raw_ostream &os) { switch (IClass) { case HexagonII::HSIG_L1: - if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS) - op = Hexagon::V4_SL1_loadri_io; - else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS) - op = Hexagon::V4_SL1_loadrub_io; + if ((inst & SL1_loadri_io_MASK) == SL1_loadri_io_BITS) + op = Hexagon::SL1_loadri_io; + else if ((inst & SL1_loadrub_io_MASK) == SL1_loadrub_io_BITS) + op = Hexagon::SL1_loadrub_io; else { os << "<unknown subinstruction>"; return MCDisassembler::Fail; } break; case HexagonII::HSIG_L2: - if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS) - op = Hexagon::V4_SL2_deallocframe; - else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS) - op = Hexagon::V4_SL2_jumpr31; - else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS) - op = Hexagon::V4_SL2_jumpr31_f; - else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS) - op = Hexagon::V4_SL2_jumpr31_fnew; - else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS) - op = Hexagon::V4_SL2_jumpr31_t; - else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS) - op = Hexagon::V4_SL2_jumpr31_tnew; - else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS) - op = Hexagon::V4_SL2_loadrb_io; - else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS) - op = Hexagon::V4_SL2_loadrd_sp; - else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS) - op = Hexagon::V4_SL2_loadrh_io; - else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS) - op = Hexagon::V4_SL2_loadri_sp; - else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS) - op = Hexagon::V4_SL2_loadruh_io; - else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS) - op = Hexagon::V4_SL2_return; - else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS) - op = Hexagon::V4_SL2_return_f; - else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS) - op = Hexagon::V4_SL2_return_fnew; - else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS) - op = Hexagon::V4_SL2_return_t; - else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS) - op = Hexagon::V4_SL2_return_tnew; + if ((inst & SL2_deallocframe_MASK) == SL2_deallocframe_BITS) + op = Hexagon::SL2_deallocframe; + else if ((inst & SL2_jumpr31_MASK) == SL2_jumpr31_BITS) + op = Hexagon::SL2_jumpr31; + else if ((inst & SL2_jumpr31_f_MASK) == SL2_jumpr31_f_BITS) + op = Hexagon::SL2_jumpr31_f; + else if ((inst & SL2_jumpr31_fnew_MASK) == SL2_jumpr31_fnew_BITS) + op = Hexagon::SL2_jumpr31_fnew; + else if ((inst & SL2_jumpr31_t_MASK) == SL2_jumpr31_t_BITS) + op = Hexagon::SL2_jumpr31_t; + else if ((inst & SL2_jumpr31_tnew_MASK) == SL2_jumpr31_tnew_BITS) + op = Hexagon::SL2_jumpr31_tnew; + else if ((inst & SL2_loadrb_io_MASK) == SL2_loadrb_io_BITS) + op = Hexagon::SL2_loadrb_io; + else if ((inst & SL2_loadrd_sp_MASK) == SL2_loadrd_sp_BITS) + op = Hexagon::SL2_loadrd_sp; + else if ((inst & SL2_loadrh_io_MASK) == SL2_loadrh_io_BITS) + op = Hexagon::SL2_loadrh_io; + else if ((inst & SL2_loadri_sp_MASK) == SL2_loadri_sp_BITS) + op = Hexagon::SL2_loadri_sp; + else if ((inst & SL2_loadruh_io_MASK) == SL2_loadruh_io_BITS) + op = Hexagon::SL2_loadruh_io; + else if ((inst & SL2_return_MASK) == SL2_return_BITS) + op = Hexagon::SL2_return; + else if ((inst & SL2_return_f_MASK) == SL2_return_f_BITS) + op = Hexagon::SL2_return_f; + else if ((inst & SL2_return_fnew_MASK) == SL2_return_fnew_BITS) + op = Hexagon::SL2_return_fnew; + else if ((inst & SL2_return_t_MASK) == SL2_return_t_BITS) + op = Hexagon::SL2_return_t; + else if ((inst & SL2_return_tnew_MASK) == SL2_return_tnew_BITS) + op = Hexagon::SL2_return_tnew; else { os << "<unknown subinstruction>"; return MCDisassembler::Fail; } break; case HexagonII::HSIG_A: - if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS) - op = Hexagon::V4_SA1_addi; - else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS) - op = Hexagon::V4_SA1_addrx; - else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS) - op = Hexagon::V4_SA1_addsp; - else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS) - op = Hexagon::V4_SA1_and1; - else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS) - op = Hexagon::V4_SA1_clrf; - else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS) - op = Hexagon::V4_SA1_clrfnew; - else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS) - op = Hexagon::V4_SA1_clrt; - else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS) - op = Hexagon::V4_SA1_clrtnew; - else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS) - op = Hexagon::V4_SA1_cmpeqi; - else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS) - op = Hexagon::V4_SA1_combine0i; - else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS) - op = Hexagon::V4_SA1_combine1i; - else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS) - op = Hexagon::V4_SA1_combine2i; - else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS) - op = Hexagon::V4_SA1_combine3i; - else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS) - op = Hexagon::V4_SA1_combinerz; - else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS) - op = Hexagon::V4_SA1_combinezr; - else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS) - op = Hexagon::V4_SA1_dec; - else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS) - op = Hexagon::V4_SA1_inc; - else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS) - op = Hexagon::V4_SA1_seti; - else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS) - op = Hexagon::V4_SA1_setin1; - else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS) - op = Hexagon::V4_SA1_sxtb; - else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS) - op = Hexagon::V4_SA1_sxth; - else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS) - op = Hexagon::V4_SA1_tfr; - else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS) - op = Hexagon::V4_SA1_zxtb; - else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS) - op = Hexagon::V4_SA1_zxth; + if ((inst & SA1_addi_MASK) == SA1_addi_BITS) + op = Hexagon::SA1_addi; + else if ((inst & SA1_addrx_MASK) == SA1_addrx_BITS) + op = Hexagon::SA1_addrx; + else if ((inst & SA1_addsp_MASK) == SA1_addsp_BITS) + op = Hexagon::SA1_addsp; + else if ((inst & SA1_and1_MASK) == SA1_and1_BITS) + op = Hexagon::SA1_and1; + else if ((inst & SA1_clrf_MASK) == SA1_clrf_BITS) + op = Hexagon::SA1_clrf; + else if ((inst & SA1_clrfnew_MASK) == SA1_clrfnew_BITS) + op = Hexagon::SA1_clrfnew; + else if ((inst & SA1_clrt_MASK) == SA1_clrt_BITS) + op = Hexagon::SA1_clrt; + else if ((inst & SA1_clrtnew_MASK) == SA1_clrtnew_BITS) + op = Hexagon::SA1_clrtnew; + else if ((inst & SA1_cmpeqi_MASK) == SA1_cmpeqi_BITS) + op = Hexagon::SA1_cmpeqi; + else if ((inst & SA1_combine0i_MASK) == SA1_combine0i_BITS) + op = Hexagon::SA1_combine0i; + else if ((inst & SA1_combine1i_MASK) == SA1_combine1i_BITS) + op = Hexagon::SA1_combine1i; + else if ((inst & SA1_combine2i_MASK) == SA1_combine2i_BITS) + op = Hexagon::SA1_combine2i; + else if ((inst & SA1_combine3i_MASK) == SA1_combine3i_BITS) + op = Hexagon::SA1_combine3i; + else if ((inst & SA1_combinerz_MASK) == SA1_combinerz_BITS) + op = Hexagon::SA1_combinerz; + else if ((inst & SA1_combinezr_MASK) == SA1_combinezr_BITS) + op = Hexagon::SA1_combinezr; + else if ((inst & SA1_dec_MASK) == SA1_dec_BITS) + op = Hexagon::SA1_dec; + else if ((inst & SA1_inc_MASK) == SA1_inc_BITS) + op = Hexagon::SA1_inc; + else if ((inst & SA1_seti_MASK) == SA1_seti_BITS) + op = Hexagon::SA1_seti; + else if ((inst & SA1_setin1_MASK) == SA1_setin1_BITS) + op = Hexagon::SA1_setin1; + else if ((inst & SA1_sxtb_MASK) == SA1_sxtb_BITS) + op = Hexagon::SA1_sxtb; + else if ((inst & SA1_sxth_MASK) == SA1_sxth_BITS) + op = Hexagon::SA1_sxth; + else if ((inst & SA1_tfr_MASK) == SA1_tfr_BITS) + op = Hexagon::SA1_tfr; + else if ((inst & SA1_zxtb_MASK) == SA1_zxtb_BITS) + op = Hexagon::SA1_zxtb; + else if ((inst & SA1_zxth_MASK) == SA1_zxth_BITS) + op = Hexagon::SA1_zxth; else { os << "<unknown subinstruction>"; return MCDisassembler::Fail; } break; case HexagonII::HSIG_S1: - if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS) - op = Hexagon::V4_SS1_storeb_io; - else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS) - op = Hexagon::V4_SS1_storew_io; + if ((inst & SS1_storeb_io_MASK) == SS1_storeb_io_BITS) + op = Hexagon::SS1_storeb_io; + else if ((inst & SS1_storew_io_MASK) == SS1_storew_io_BITS) + op = Hexagon::SS1_storew_io; else { os << "<unknown subinstruction>"; return MCDisassembler::Fail; } break; case HexagonII::HSIG_S2: - if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS) - op = Hexagon::V4_SS2_allocframe; - else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS) - op = Hexagon::V4_SS2_storebi0; - else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS) - op = Hexagon::V4_SS2_storebi1; - else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS) - op = Hexagon::V4_SS2_stored_sp; - else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS) - op = Hexagon::V4_SS2_storeh_io; - else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS) - op = Hexagon::V4_SS2_storew_sp; - else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS) - op = Hexagon::V4_SS2_storewi0; - else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS) - op = Hexagon::V4_SS2_storewi1; + if ((inst & SS2_allocframe_MASK) == SS2_allocframe_BITS) + op = Hexagon::SS2_allocframe; + else if ((inst & SS2_storebi0_MASK) == SS2_storebi0_BITS) + op = Hexagon::SS2_storebi0; + else if ((inst & SS2_storebi1_MASK) == SS2_storebi1_BITS) + op = Hexagon::SS2_storebi1; + else if ((inst & SS2_stored_sp_MASK) == SS2_stored_sp_BITS) + op = Hexagon::SS2_stored_sp; + else if ((inst & SS2_storeh_io_MASK) == SS2_storeh_io_BITS) + op = Hexagon::SS2_storeh_io; + else if ((inst & SS2_storew_sp_MASK) == SS2_storew_sp_BITS) + op = Hexagon::SS2_storew_sp; + else if ((inst & SS2_storewi0_MASK) == SS2_storewi0_BITS) + op = Hexagon::SS2_storewi0; + else if ((inst & SS2_storewi1_MASK) == SS2_storewi1_BITS) + op = Hexagon::SS2_storewi1; else { os << "<unknown subinstruction>"; return MCDisassembler::Fail; @@ -1362,25 +1377,25 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, int64_t operand; MCOperand Op; switch (opcode) { - case Hexagon::V4_SL2_deallocframe: - case Hexagon::V4_SL2_jumpr31: - case Hexagon::V4_SL2_jumpr31_f: - case Hexagon::V4_SL2_jumpr31_fnew: - case Hexagon::V4_SL2_jumpr31_t: - case Hexagon::V4_SL2_jumpr31_tnew: - case Hexagon::V4_SL2_return: - case Hexagon::V4_SL2_return_f: - case Hexagon::V4_SL2_return_fnew: - case Hexagon::V4_SL2_return_t: - case Hexagon::V4_SL2_return_tnew: + case Hexagon::SL2_deallocframe: + case Hexagon::SL2_jumpr31: + case Hexagon::SL2_jumpr31_f: + case Hexagon::SL2_jumpr31_fnew: + case Hexagon::SL2_jumpr31_t: + case Hexagon::SL2_jumpr31_tnew: + case Hexagon::SL2_return: + case Hexagon::SL2_return_f: + case Hexagon::SL2_return_fnew: + case Hexagon::SL2_return_t: + case Hexagon::SL2_return_tnew: // no operands for these instructions break; - case Hexagon::V4_SS2_allocframe: + case Hexagon::SS2_allocframe: // u 8-4{5_3} operand = ((inst & 0x1f0) >> 4) << 3; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SL1_loadri_io: + case Hexagon::SL1_loadri_io: // Rd 3-0, Rs 7-4, u 11-8{4_2} operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1391,7 +1406,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = (inst & 0xf00) >> 6; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SL1_loadrub_io: + case Hexagon::SL1_loadrub_io: // Rd 3-0, Rs 7-4, u 11-8 operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1402,7 +1417,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = (inst & 0xf00) >> 8; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SL2_loadrb_io: + case Hexagon::SL2_loadrb_io: // Rd 3-0, Rs 7-4, u 10-8 operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1413,8 +1428,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = (inst & 0x700) >> 8; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SL2_loadrh_io: - case Hexagon::V4_SL2_loadruh_io: + case Hexagon::SL2_loadrh_io: + case Hexagon::SL2_loadruh_io: // Rd 3-0, Rs 7-4, u 10-8{3_1} operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1425,7 +1440,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = ((inst & 0x700) >> 8) << 1; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SL2_loadrd_sp: + case Hexagon::SL2_loadrd_sp: // Rdd 2-0, u 7-3{5_3} operand = getDRegFromSubinstEncoding(inst & 0x7); Op = MCOperand::createReg(operand); @@ -1433,7 +1448,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = ((inst & 0x0f8) >> 3) << 3; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SL2_loadri_sp: + case Hexagon::SL2_loadri_sp: // Rd 3-0, u 8-4{5_2} operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1441,7 +1456,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = ((inst & 0x1f0) >> 4) << 2; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SA1_addi: + case Hexagon::SA1_addi: // Rx 3-0 (x2), s7 10-4 operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1450,7 +1465,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = SignExtend64<7>((inst & 0x7f0) >> 4); HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SA1_addrx: + case Hexagon::SA1_addrx: // Rx 3-0 (x2), Rs 7-4 operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1460,14 +1475,14 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, Op = MCOperand::createReg(operand); MI->addOperand(Op); break; - case Hexagon::V4_SA1_and1: - case Hexagon::V4_SA1_dec: - case Hexagon::V4_SA1_inc: - case Hexagon::V4_SA1_sxtb: - case Hexagon::V4_SA1_sxth: - case Hexagon::V4_SA1_tfr: - case Hexagon::V4_SA1_zxtb: - case Hexagon::V4_SA1_zxth: + case Hexagon::SA1_and1: + case Hexagon::SA1_dec: + case Hexagon::SA1_inc: + case Hexagon::SA1_sxtb: + case Hexagon::SA1_sxth: + case Hexagon::SA1_tfr: + case Hexagon::SA1_zxtb: + case Hexagon::SA1_zxth: // Rd 3-0, Rs 7-4 operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1476,7 +1491,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, Op = MCOperand::createReg(operand); MI->addOperand(Op); break; - case Hexagon::V4_SA1_addsp: + case Hexagon::SA1_addsp: // Rd 3-0, u 9-4{6_2} operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1484,7 +1499,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = ((inst & 0x3f0) >> 4) << 2; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SA1_seti: + case Hexagon::SA1_seti: // Rd 3-0, u 9-4 operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); @@ -1492,17 +1507,20 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = (inst & 0x3f0) >> 4; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SA1_clrf: - case Hexagon::V4_SA1_clrfnew: - case Hexagon::V4_SA1_clrt: - case Hexagon::V4_SA1_clrtnew: - case Hexagon::V4_SA1_setin1: + case Hexagon::SA1_clrf: + case Hexagon::SA1_clrfnew: + case Hexagon::SA1_clrt: + case Hexagon::SA1_clrtnew: + case Hexagon::SA1_setin1: // Rd 3-0 operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); + if (opcode == Hexagon::SA1_setin1) + break; + MI->addOperand(MCOperand::createReg(Hexagon::P0)); break; - case Hexagon::V4_SA1_cmpeqi: + case Hexagon::SA1_cmpeqi: // Rs 7-4, u 1-0 operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); @@ -1510,10 +1528,10 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = inst & 0x3; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SA1_combine0i: - case Hexagon::V4_SA1_combine1i: - case Hexagon::V4_SA1_combine2i: - case Hexagon::V4_SA1_combine3i: + case Hexagon::SA1_combine0i: + case Hexagon::SA1_combine1i: + case Hexagon::SA1_combine2i: + case Hexagon::SA1_combine3i: // Rdd 2-0, u 6-5 operand = getDRegFromSubinstEncoding(inst & 0x7); Op = MCOperand::createReg(operand); @@ -1521,8 +1539,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = (inst & 0x060) >> 5; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SA1_combinerz: - case Hexagon::V4_SA1_combinezr: + case Hexagon::SA1_combinerz: + case Hexagon::SA1_combinezr: // Rdd 2-0, Rs 7-4 operand = getDRegFromSubinstEncoding(inst & 0x7); Op = MCOperand::createReg(operand); @@ -1531,7 +1549,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, Op = MCOperand::createReg(operand); MI->addOperand(Op); break; - case Hexagon::V4_SS1_storeb_io: + case Hexagon::SS1_storeb_io: // Rs 7-4, u 11-8, Rt 3-0 operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); @@ -1542,7 +1560,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, Op = MCOperand::createReg(operand); MI->addOperand(Op); break; - case Hexagon::V4_SS1_storew_io: + case Hexagon::SS1_storew_io: // Rs 7-4, u 11-8{4_2}, Rt 3-0 operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); @@ -1553,8 +1571,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, Op = MCOperand::createReg(operand); MI->addOperand(Op); break; - case Hexagon::V4_SS2_storebi0: - case Hexagon::V4_SS2_storebi1: + case Hexagon::SS2_storebi0: + case Hexagon::SS2_storebi1: // Rs 7-4, u 3-0 operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); @@ -1562,8 +1580,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = inst & 0xf; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SS2_storewi0: - case Hexagon::V4_SS2_storewi1: + case Hexagon::SS2_storewi0: + case Hexagon::SS2_storewi1: // Rs 7-4, u 3-0{4_2} operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); @@ -1571,7 +1589,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = (inst & 0xf) << 2; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; - case Hexagon::V4_SS2_stored_sp: + case Hexagon::SS2_stored_sp: // s 8-3{6_3}, Rtt 2-0 operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3); HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); @@ -1579,7 +1597,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, Op = MCOperand::createReg(operand); MI->addOperand(Op); break; - case Hexagon::V4_SS2_storeh_io: + case Hexagon::SS2_storeh_io: // Rs 7-4, u 10-8{3_1}, Rt 3-0 operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); @@ -1590,7 +1608,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, Op = MCOperand::createReg(operand); MI->addOperand(Op); break; - case Hexagon::V4_SS2_storew_sp: + case Hexagon::SS2_storew_sp: // u 8-4{5_2}, Rd 3-0 operand = ((inst & 0x1f0) >> 4) << 2; HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td index aaa0f3e..0b2b463 100644 --- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td @@ -27,11 +27,12 @@ def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">; def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">; -// Hexagon ISA Extensions -def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps", - "true", "Hexagon HVX instructions">; -def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", - "true", "Hexagon HVX Double instructions">; +def FeatureHVX: SubtargetFeature<"hvx", "UseHVXOps", "true", + "Hexagon HVX instructions">; +def FeatureHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", "true", + "Hexagon HVX Double instructions">; +def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true", + "Use constant-extended calls">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. @@ -45,10 +46,10 @@ def HasV60T : Predicate<"HST->hasV60TOps()">, def UseMEMOP : Predicate<"HST->useMemOps()">; def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; def UseHVXDbl : Predicate<"HST->useHVXDblOps()">, - AssemblerPredicate<"ExtensionHVXDbl">; + AssemblerPredicate<"FeatureHVXDbl">; def UseHVXSgl : Predicate<"HST->useHVXSglOps()">; def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">, - AssemblerPredicate<"ExtensionHVX">; + AssemblerPredicate<"FeatureHVX">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -249,6 +250,7 @@ include "HexagonSchedule.td" include "HexagonRegisterInfo.td" include "HexagonCallingConv.td" include "HexagonInstrInfo.td" +include "HexagonPatterns.td" include "HexagonIntrinsics.td" include "HexagonIntrinsicsDerived.td" @@ -269,7 +271,7 @@ def : Proc<"hexagonv5", HexagonModelV4, def : Proc<"hexagonv55", HexagonModelV55, [ArchV4, ArchV5, ArchV55]>; def : Proc<"hexagonv60", HexagonModelV60, - [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>; + [ArchV4, ArchV5, ArchV55, ArchV60, FeatureHVX]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing @@ -282,7 +284,7 @@ def HexagonAsmParser : AsmParser { def HexagonAsmParserVariant : AsmParserVariant { int Variant = 0; - string TokenizingCharacters = "#()=:.<>!+*"; + string TokenizingCharacters = "#()=:.<>!+*-|^&"; } def Hexagon : Target { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index cd954a1..54db5ad 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -81,7 +81,7 @@ HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM, : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {} void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { + raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { @@ -141,14 +141,22 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, // Hexagon never has a prefix. printOperand(MI, OpNo, OS); return false; - case 'L': // Write second word of DImode reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNo).isReg() || - OpNo+1 == MI->getNumOperands() || - !MI->getOperand(OpNo+1).isReg()) + case 'L': + case 'H': { // The highest-numbered register of a pair. + const MachineOperand &MO = MI->getOperand(OpNo); + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!MO.isReg()) return true; - ++OpNo; // Return the high-part. - break; + unsigned RegNumber = MO.getReg(); + // This should be an assert in the frontend. + if (Hexagon::DoubleRegsRegClass.contains(RegNumber)) + RegNumber = TRI->getSubReg(RegNumber, ExtraCode[0] == 'L' ? + Hexagon::isub_lo : + Hexagon::isub_hi); + OS << HexagonInstPrinter::getRegisterName(RegNumber); + return false; + } case 'I': // Write 'i' if an integer constant, otherwise nothing. Used to print // addi vs add, etc. @@ -163,9 +171,9 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. @@ -275,8 +283,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, } // "$dst = CONST64(#$src1)", - case Hexagon::CONST64_Float_Real: - case Hexagon::CONST64_Int_Real: + case Hexagon::CONST64: if (!OutStreamer->hasRawTextSupport()) { const MCOperand &Imm = MappedInst.getOperand(1); MCSectionSubPair Current = OutStreamer->getCurrentSection(); @@ -295,9 +302,6 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, } break; case Hexagon::CONST32: - case Hexagon::CONST32_Float_Real: - case Hexagon::CONST32_Int_Real: - case Hexagon::FCONST32_nsdata: if (!OutStreamer->hasRawTextSupport()) { MCOperand &Imm = MappedInst.getOperand(1); MCSectionSubPair Current = OutStreamer->getCurrentSection(); @@ -410,8 +414,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, TmpInst.setOpcode(Hexagon::A2_combinew); TmpInst.addOperand(MappedInst.getOperand(0)); MCOperand &MO1 = MappedInst.getOperand(1); - unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg); - unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg); + unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::isub_hi); + unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::isub_lo); // Add a new operand for the second register in the pair. TmpInst.addOperand(MCOperand::createReg(High)); TmpInst.addOperand(MCOperand::createReg(Low)); @@ -458,21 +462,6 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MappedInst = TmpInst; return; } - case Hexagon::TFRI_f: - MappedInst.setOpcode(Hexagon::A2_tfrsi); - return; - case Hexagon::TFRI_cPt_f: - MappedInst.setOpcode(Hexagon::C2_cmoveit); - return; - case Hexagon::TFRI_cNotPt_f: - MappedInst.setOpcode(Hexagon::C2_cmoveif); - return; - case Hexagon::MUX_ri_f: - MappedInst.setOpcode(Hexagon::C2_muxri); - return; - case Hexagon::MUX_ir_f: - MappedInst.setOpcode(Hexagon::C2_muxir); - return; // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)" case Hexagon::A2_tfrpi: { @@ -498,8 +487,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" case Hexagon::A2_tfrp: { MCOperand &MO = MappedInst.getOperand(1); - unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); - unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::isub_hi); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::isub_lo); MO.setReg(High); // Add a new operand for the second register in the pair. MappedInst.addOperand(MCOperand::createReg(Low)); @@ -510,8 +499,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, case Hexagon::A2_tfrpt: case Hexagon::A2_tfrpf: { MCOperand &MO = MappedInst.getOperand(2); - unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); - unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::isub_hi); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::isub_lo); MO.setReg(High); // Add a new operand for the second register in the pair. MappedInst.addOperand(MCOperand::createReg(Low)); @@ -523,8 +512,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, case Hexagon::A2_tfrptnew: case Hexagon::A2_tfrpfnew: { MCOperand &MO = MappedInst.getOperand(2); - unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); - unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::isub_hi); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::isub_lo); MO.setReg(High); // Add a new operand for the second register in the pair. MappedInst.addOperand(MCOperand::createReg(Low)); @@ -561,8 +550,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); return; } - case Hexagon::HEXAGON_V6_vd0_pseudo: - case Hexagon::HEXAGON_V6_vd0_pseudo_128B: { + case Hexagon::V6_vd0: + case Hexagon::V6_vd0_128B: { MCInst TmpInst; assert (Inst.getOperand(0).isReg() && "Expected register and none was found"); @@ -611,5 +600,5 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { } extern "C" void LLVMInitializeHexagonAsmPrinter() { - RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); + RegisterAsmPrinter<HexagonAsmPrinter> X(getTheHexagonTarget()); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h index a78d97e..775da03 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -33,7 +33,7 @@ namespace llvm { return AsmPrinter::runOnMachineFunction(Fn); } - const char *getPassName() const override { + StringRef getPassName() const override { return "Hexagon Assembly Printer"; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index c8b4a4c..fe7278f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -11,29 +11,56 @@ #include "HexagonBitTracker.h" #include "HexagonTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <utility> +#include <vector> using namespace llvm; +static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden, + cl::init(true), cl::desc("Preserve subregisters in tied operands")); + namespace llvm { + void initializeHexagonBitSimplifyPass(PassRegistry& Registry); FunctionPass *createHexagonBitSimplify(); -} + +} // end namespace llvm namespace { + // Set of virtual registers, based on BitVector. struct RegisterSet : private BitVector { - RegisterSet() : BitVector() {} + RegisterSet() = default; explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} - RegisterSet(const RegisterSet &RS) : BitVector(RS) {} + RegisterSet(const RegisterSet &RS) = default; using BitVector::clear; using BitVector::count; @@ -104,20 +131,23 @@ namespace { if (size() <= Idx) resize(std::max(Idx+1, 32U)); } + static inline unsigned v2x(unsigned v) { return TargetRegisterInfo::virtReg2Index(v); } + static inline unsigned x2v(unsigned x) { return TargetRegisterInfo::index2VirtReg(x); } }; - struct PrintRegSet { PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) : RS(S), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P); + private: const RegisterSet &RS; const TargetRegisterInfo *TRI; @@ -132,27 +162,28 @@ namespace { OS << " }"; return OS; } -} - -namespace { class Transformation; class HexagonBitSimplify : public MachineFunctionPass { public: static char ID; - HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) { + + HexagonBitSimplify() : MachineFunctionPass(ID), MDT(nullptr) { initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); } - virtual const char *getPassName() const { + + StringRef getPassName() const override { return "Hexagon bit simplification"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + + bool runOnMachineFunction(MachineFunction &MF) override; static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); @@ -171,7 +202,8 @@ namespace { static bool replaceSubWithSub(unsigned OldR, unsigned OldSR, unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI); static bool parseRegSequence(const MachineInstr &I, - BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH); + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, + const MachineRegisterInfo &MRI); static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, uint16_t Begin); @@ -187,23 +219,27 @@ namespace { MachineDominatorTree *MDT; bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); + static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, + unsigned NewSub = Hexagon::NoSubRegister); }; char HexagonBitSimplify::ID = 0; typedef HexagonBitSimplify HBS; - // The purpose of this class is to provide a common facility to traverse // the function top-down or bottom-up via the dominator tree, and keep // track of the available registers. class Transformation { public: bool TopDown; + Transformation(bool TD) : TopDown(TD) {} + virtual ~Transformation() = default; + virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; - virtual ~Transformation() {} }; -} + +} // end anonymous namespace INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", "Hexagon bit simplification", false, false) @@ -211,7 +247,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", "Hexagon bit simplification", false, false) - bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs) { MachineDomTreeNode *N = MDT->getNode(&B); @@ -290,7 +325,6 @@ bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, return true; } - bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, uint16_t B, uint16_t W, uint64_t &U) { assert(B < RC.width() && B+W <= RC.width()); @@ -307,7 +341,6 @@ bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, return true; } - bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, MachineRegisterInfo &MRI) { if (!TargetRegisterInfo::isVirtualRegister(OldR) || @@ -322,12 +355,13 @@ bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, return Begin != End; } - bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { if (!TargetRegisterInfo::isVirtualRegister(OldR) || !TargetRegisterInfo::isVirtualRegister(NewR)) return false; + if (hasTiedUse(OldR, MRI, NewSR)) + return false; auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); decltype(End) NextI; for (auto I = Begin; I != End; I = NextI) { @@ -338,12 +372,13 @@ bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, return Begin != End; } - bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { if (!TargetRegisterInfo::isVirtualRegister(OldR) || !TargetRegisterInfo::isVirtualRegister(NewR)) return false; + if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR)) + return false; auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); decltype(End) NextI; for (auto I = Begin; I != End; I = NextI) { @@ -356,47 +391,54 @@ bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, return Begin != End; } - // For a register ref (pair Reg:Sub), set Begin to the position of the LSB // of Sub in Reg, and set Width to the size of Sub in bits. Return true, // if this succeeded, otherwise return false. bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); - if (RC == &Hexagon::IntRegsRegClass) { - assert(RR.Sub == 0); + if (RR.Sub == 0) { Begin = 0; - Width = 32; + Width = RC->getSize()*8; return true; } - if (RC == &Hexagon::DoubleRegsRegClass) { - if (RR.Sub == 0) { - Begin = 0; - Width = 64; - return true; - } - assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg); - Width = 32; - Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32); - return true; + + Begin = 0; + + switch (RC->getID()) { + case Hexagon::DoubleRegsRegClassID: + case Hexagon::VecDblRegsRegClassID: + case Hexagon::VecDblRegs128BRegClassID: + Width = RC->getSize()*8 / 2; + if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) + Begin = Width; + break; + default: + return false; } - return false; + return true; } // For a REG_SEQUENCE, set SL to the low subregister and SH to the high // subregister. bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, - BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) { + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, + const MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); - assert(Sub1 != Sub2); - if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) { + auto *DstRC = MRI.getRegClass(I.getOperand(0).getReg()); + auto &HRI = static_cast<const HexagonRegisterInfo&>( + *MRI.getTargetRegisterInfo()); + unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo); + unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi); + assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo)); + if (Sub1 == SubLo && Sub2 == SubHi) { SL = I.getOperand(1); SH = I.getOperand(3); return true; } - if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) { + if (Sub1 == SubHi && Sub2 == SubLo) { SH = I.getOperand(1); SL = I.getOperand(3); return true; @@ -404,7 +446,6 @@ bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, return false; } - // All stores (except 64-bit stores) take a 32-bit register as the source // of the value to be stored. If the instruction stores into a location // that is shorter than 32 bits, some bits of the source register are not @@ -562,7 +603,6 @@ bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, return false; } - // For an instruction with opcode Opc, calculate the set of bits that it // uses in a register in operand OpN. This only calculates the set of used // bits for cases where it does not depend on any operands (as is the case @@ -842,9 +882,8 @@ bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, return false; } - // Calculate the register class that matches Reg:Sub. For example, if -// vreg1 is a double register, then vreg1:subreg_hireg would match "int" +// vreg1 is a double register, then vreg1:isub_hi would match the "int" // register class. const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { @@ -853,26 +892,28 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( auto *RC = MRI.getRegClass(RR.Reg); if (RR.Sub == 0) return RC; + auto &HRI = static_cast<const HexagonRegisterInfo&>( + *MRI.getTargetRegisterInfo()); - auto VerifySR = [] (unsigned Sub) -> void { - assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg); + auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void { + assert(Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo) || + Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi)); }; switch (RC->getID()) { case Hexagon::DoubleRegsRegClassID: - VerifySR(RR.Sub); + VerifySR(RC, RR.Sub); return &Hexagon::IntRegsRegClass; case Hexagon::VecDblRegsRegClassID: - VerifySR(RR.Sub); + VerifySR(RC, RR.Sub); return &Hexagon::VectorRegsRegClass; case Hexagon::VecDblRegs128BRegClassID: - VerifySR(RR.Sub); + VerifySR(RC, RR.Sub); return &Hexagon::VectorRegs128BRegClass; } return nullptr; } - // Check if RD could be replaced with RS at any possible use of RD. // For example a predicate register cannot be replaced with a integer // register, but a 64-bit register with a subregister can be replaced @@ -890,11 +931,18 @@ bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, return DRC == getFinalVRegClass(RS, MRI); } +bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, + unsigned NewSub) { + if (!PreserveTiedOps) + return false; + return llvm::any_of(MRI.use_operands(Reg), + [NewSub] (const MachineOperand &Op) -> bool { + return Op.getSubReg() != NewSub && Op.isTied(); + }); +} -// -// Dead code elimination -// namespace { + class DeadCodeElimination { public: DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) @@ -914,8 +962,8 @@ namespace { MachineDominatorTree &MDT; MachineRegisterInfo &MRI; }; -} +} // end anonymous namespace bool DeadCodeElimination::isDead(unsigned R) const { for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { @@ -933,7 +981,6 @@ bool DeadCodeElimination::isDead(unsigned R) const { return true; } - bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { bool Changed = false; typedef GraphTraits<MachineDomTreeNode*> GTN; @@ -983,8 +1030,8 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { return Changed; } +namespace { -// // Eliminate redundant instructions // // This transformation will identify instructions where the output register @@ -995,13 +1042,14 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { // registers. // If the output matches an input, the instruction is replaced with COPY. // The copies will be removed by another transformation. -namespace { class RedundantInstrElimination : public Transformation { public: RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, MachineRegisterInfo &mri) : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, unsigned &LostB, unsigned &LostE); @@ -1016,8 +1064,8 @@ namespace { MachineRegisterInfo &MRI; BitTracker &BT; }; -} +} // end anonymous namespace // Check if the instruction is a lossy shift left, where the input being // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range @@ -1025,6 +1073,7 @@ namespace { bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, unsigned &LostB, unsigned &LostE) { using namespace Hexagon; + unsigned Opc = MI.getOpcode(); unsigned ImN, RegN, Width; switch (Opc) { @@ -1078,13 +1127,13 @@ bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, return true; } - // Check if the instruction is a lossy shift right, where the input being // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range // of bit indices that are lost. bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, unsigned OpN, unsigned &LostB, unsigned &LostE) { using namespace Hexagon; + unsigned Opc = MI.getOpcode(); unsigned ImN, RegN; switch (Opc) { @@ -1141,7 +1190,6 @@ bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, return true; } - // Calculate the bit vector that corresponds to the used bits of register Reg. // The vector Bits has the same size, as the size of Reg in bits. If the cal- // culation fails (i.e. the used bits are unknown), it returns false. Other- @@ -1178,7 +1226,6 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { return true; } - // Calculate the bits used by instruction MI in a register in operand OpN. // Return true/false if the calculation succeeds/fails. If is succeeds, set // used bits in Bits. This function does not reset any bits in Bits, so @@ -1188,11 +1235,11 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { // holds the bits for the entire register. To keep track of that, the // argument Begin indicates where in Bits is the lowest-significant bit // of the register used in operand OpN. For example, in instruction: -// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10 +// vreg1 = S2_lsr_i_r vreg2:isub_hi, 10 // the operand 1 is a 32-bit register, which happens to be a subregister // of the 64-bit register vreg2, and that subregister starts at position 32. // In this case Begin=32, since Bits[32] would be the lowest-significant bit -// of vreg2:subreg_hireg. +// of vreg2:isub_hi. bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, uint16_t Begin) { unsigned Opc = MI.getOpcode(); @@ -1219,7 +1266,6 @@ bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, return GotBits; } - // Calculates the used bits in RD ("defined register"), and checks if these // bits in RS ("used register") and RD are identical. bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, @@ -1246,9 +1292,10 @@ bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, return true; } - bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, const RegisterSet&) { + if (!BT.reached(&B)) + return false; bool Changed = false; for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) { @@ -1292,10 +1339,20 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, const DebugLoc &DL = MI->getDebugLoc(); const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); unsigned NewR = MRI.createVirtualRegister(FRC); - BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) - .addReg(RS.Reg, 0, RS.Sub); + MachineInstr *CopyI = + BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(RS.Reg, 0, RS.Sub); HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); - BT.put(BitTracker::RegisterRef(NewR), SC); + // This pass can create copies between registers that don't have the + // exact same values. Updating the tracker has to involve updating + // all dependent cells. Example: + // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal + // + // vreg3 = copy vreg2 ; <- inserted + // ... = vreg3 ; <- replaced from vreg2 + // Indirectly, we can create a "copy" between vreg1 and vreg2 even + // though their exact values do not match. + BT.visit(*CopyI); Changed = true; break; } @@ -1304,22 +1361,20 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, return Changed; } +namespace { -// -// Const generation -// // Recognize instructions that produce constant values known at compile-time. // Replace them with register definitions that load these constants directly. -namespace { class ConstGeneration : public Transformation { public: ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, MachineRegisterInfo &mri) : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + static bool isTfrConst(const MachineInstr &MI); + private: - bool isTfrConst(const MachineInstr &MI) const; - bool isConst(unsigned R, int64_t &V) const; unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); @@ -1327,42 +1382,25 @@ namespace { MachineRegisterInfo &MRI; BitTracker &BT; }; -} -bool ConstGeneration::isConst(unsigned R, int64_t &C) const { - if (!BT.has(R)) - return false; - const BitTracker::RegisterCell &RC = BT.lookup(R); - int64_t T = 0; - for (unsigned i = RC.width(); i > 0; --i) { - const BitTracker::BitValue &V = RC[i-1]; - T <<= 1; - if (V.is(1)) - T |= 1; - else if (!V.is(0)) - return false; - } - C = T; - return true; -} +} // end anonymous namespace -bool ConstGeneration::isTfrConst(const MachineInstr &MI) const { +bool ConstGeneration::isTfrConst(const MachineInstr &MI) { unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::A2_combineii: case Hexagon::A4_combineii: case Hexagon::A2_tfrsi: case Hexagon::A2_tfrpi: - case Hexagon::TFR_PdTrue: - case Hexagon::TFR_PdFalse: - case Hexagon::CONST32_Int_Real: - case Hexagon::CONST64_Int_Real: + case Hexagon::PS_true: + case Hexagon::PS_false: + case Hexagon::CONST32: + case Hexagon::CONST64: return true; } return false; } - // Generate a transfer-immediate instruction that is appropriate for the // register class and the actual value being transferred. unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, @@ -1391,7 +1429,7 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, return Reg; } - BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg) + BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg) .addImm(C); return Reg; } @@ -1399,9 +1437,9 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, if (RC == &Hexagon::PredRegsRegClass) { unsigned Opc; if (C == 0) - Opc = Hexagon::TFR_PdFalse; + Opc = Hexagon::PS_false; else if ((C & 0xFF) == 0xFF) - Opc = Hexagon::TFR_PdTrue; + Opc = Hexagon::PS_true; else return 0; BuildMI(B, At, DL, HII.get(Opc), Reg); @@ -1411,8 +1449,9 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, return 0; } - bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { + if (!BT.reached(&B)) + return false; bool Changed = false; RegisterSet Defs; @@ -1426,14 +1465,16 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { unsigned DR = Defs.find_first(); if (!TargetRegisterInfo::isVirtualRegister(DR)) continue; - int64_t C; - if (isConst(DR, C)) { + uint64_t U; + const BitTracker::RegisterCell &DRC = BT.lookup(DR); + if (HBS::getConst(DRC, 0, DRC.width(), U)) { + int64_t C = U; DebugLoc DL = I->getDebugLoc(); auto At = I->isPHI() ? B.getFirstNonPHI() : I; unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); if (ImmReg) { HBS::replaceReg(DR, ImmReg, MRI); - BT.put(ImmReg, BT.lookup(DR)); + BT.put(ImmReg, DRC); Changed = true; } } @@ -1441,48 +1482,49 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { return Changed; } +namespace { -// -// Copy generation -// // Identify pairs of available registers which hold identical values. // In such cases, only one of them needs to be calculated, the other one // will be defined as a copy of the first. -// -// Copy propagation -// -// Eliminate register copies RD = RS, by replacing the uses of RD with -// with uses of RS. -namespace { class CopyGeneration : public Transformation { public: CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, - MachineRegisterInfo &mri) - : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: bool findMatch(const BitTracker::RegisterRef &Inp, BitTracker::RegisterRef &Out, const RegisterSet &AVs); const HexagonInstrInfo &HII; + const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; BitTracker &BT; + RegisterSet Forbidden; }; +// Eliminate register copies RD = RS, by replacing the uses of RD with +// with uses of RS. class CopyPropagation : public Transformation { public: CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) - : Transformation(false), MRI(mri) {} + : Transformation(false), HRI(hri), MRI(mri) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; - static bool isCopyReg(unsigned Opc); + + static bool isCopyReg(unsigned Opc, bool NoConv); + private: bool propagateRegCopy(MachineInstr &MI); + const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; }; -} - +} // end anonymous namespace /// Check if there is a register in AVs that is identical to Inp. If so, /// set Out to the found register. The output may be a pair Reg:Sub. @@ -1491,17 +1533,20 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, if (!BT.has(Inp.Reg)) return false; const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); + auto *FRC = HBS::getFinalVRegClass(Inp, MRI); unsigned B, W; if (!HBS::getSubregMask(Inp, B, W, MRI)) return false; for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) { - if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI)) + if (!BT.has(R) || Forbidden[R]) continue; const BitTracker::RegisterCell &RC = BT.lookup(R); unsigned RW = RC.width(); if (W == RW) { - if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R)) + if (FRC != MRI.getRegClass(R)) + continue; + if (!HBS::isTransparentCopy(R, Inp, MRI)) continue; if (!HBS::isEqual(InpRC, B, RC, 0, W)) continue; @@ -1518,20 +1563,22 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, continue; if (HBS::isEqual(InpRC, B, RC, 0, W)) - Out.Sub = Hexagon::subreg_loreg; + Out.Sub = Hexagon::isub_lo; else if (HBS::isEqual(InpRC, B, RC, W, W)) - Out.Sub = Hexagon::subreg_hireg; + Out.Sub = Hexagon::isub_hi; else continue; Out.Reg = R; - return true; + if (HBS::isTransparentCopy(Out, Inp, MRI)) + return true; } return false; } - bool CopyGeneration::processBlock(MachineBasicBlock &B, const RegisterSet &AVs) { + if (!BT.reached(&B)) + return false; RegisterSet AVB(AVs); bool Changed = false; RegisterSet Defs; @@ -1543,44 +1590,74 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B, HBS::getInstrDefs(*I, Defs); unsigned Opc = I->getOpcode(); - if (CopyPropagation::isCopyReg(Opc)) + if (CopyPropagation::isCopyReg(Opc, false) || + ConstGeneration::isTfrConst(*I)) continue; + DebugLoc DL = I->getDebugLoc(); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) { BitTracker::RegisterRef MR; - if (!findMatch(R, MR, AVB)) + auto *FRC = HBS::getFinalVRegClass(R, MRI); + + if (findMatch(R, MR, AVB)) { + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(MR.Reg, 0, MR.Sub); + BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); + HBS::replaceReg(R, NewR, MRI); + Forbidden.insert(R); continue; - DebugLoc DL = I->getDebugLoc(); - auto *FRC = HBS::getFinalVRegClass(MR, MRI); - unsigned NewR = MRI.createVirtualRegister(FRC); - auto At = I->isPHI() ? B.getFirstNonPHI() : I; - BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) - .addReg(MR.Reg, 0, MR.Sub); - BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); + } + + if (FRC == &Hexagon::DoubleRegsRegClass || + FRC == &Hexagon::VecDblRegsRegClass || + FRC == &Hexagon::VecDblRegs128BRegClass) { + // Try to generate REG_SEQUENCE. + unsigned SubLo = HRI.getHexagonSubRegIndex(FRC, Hexagon::ps_sub_lo); + unsigned SubHi = HRI.getHexagonSubRegIndex(FRC, Hexagon::ps_sub_hi); + BitTracker::RegisterRef TL = { R, SubLo }; + BitTracker::RegisterRef TH = { R, SubHi }; + BitTracker::RegisterRef ML, MH; + if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) { + auto *FRC = HBS::getFinalVRegClass(R, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR) + .addReg(ML.Reg, 0, ML.Sub) + .addImm(SubLo) + .addReg(MH.Reg, 0, MH.Sub) + .addImm(SubHi); + BT.put(BitTracker::RegisterRef(NewR), BT.get(R)); + HBS::replaceReg(R, NewR, MRI); + Forbidden.insert(R); + } + } } } return Changed; } - -bool CopyPropagation::isCopyReg(unsigned Opc) { +bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) { switch (Opc) { case TargetOpcode::COPY: case TargetOpcode::REG_SEQUENCE: - case Hexagon::A2_tfr: - case Hexagon::A2_tfrp: - case Hexagon::A2_combinew: case Hexagon::A4_combineir: case Hexagon::A4_combineri: return true; + case Hexagon::A2_tfr: + case Hexagon::A2_tfrp: + case Hexagon::A2_combinew: + case Hexagon::V6_vcombine: + case Hexagon::V6_vcombine_128B: + return NoConv; default: break; } return false; } - bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { bool Changed = false; unsigned Opc = MI.getOpcode(); @@ -1602,27 +1679,31 @@ bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { } case TargetOpcode::REG_SEQUENCE: { BitTracker::RegisterRef SL, SH; - if (HBS::parseRegSequence(MI, SL, SH)) { - Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, - SL.Reg, SL.Sub, MRI); - Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, - SH.Reg, SH.Sub, MRI); + if (HBS::parseRegSequence(MI, SL, SH, MRI)) { + const TargetRegisterClass *RC = MRI.getRegClass(RD.Reg); + unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); + unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); + Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI); } break; } - case Hexagon::A2_combinew: { + case Hexagon::A2_combinew: + case Hexagon::V6_vcombine: + case Hexagon::V6_vcombine_128B: { + const TargetRegisterClass *RC = MRI.getRegClass(RD.Reg); + unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); + unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); - Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, - RL.Reg, RL.Sub, MRI); - Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, - RH.Reg, RH.Sub, MRI); + Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI); break; } case Hexagon::A4_combineir: case Hexagon::A4_combineri: { unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; - unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg - : Hexagon::subreg_hireg; + unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo + : Hexagon::isub_hi; BitTracker::RegisterRef RS = MI.getOperand(SrcX); Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); break; @@ -1631,7 +1712,6 @@ bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { return Changed; } - bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { std::vector<MachineInstr*> Instrs; for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) @@ -1640,7 +1720,7 @@ bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { bool Changed = false; for (auto I : Instrs) { unsigned Opc = I->getOpcode(); - if (!CopyPropagation::isCopyReg(Opc)) + if (!CopyPropagation::isCopyReg(Opc, true)) continue; Changed |= propagateRegCopy(*I); } @@ -1648,20 +1728,20 @@ bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { return Changed; } +namespace { -// -// Bit simplification -// // Recognize patterns that can be simplified and replace them with the // simpler forms. // This is by no means complete -namespace { class BitSimplification : public Transformation { public: BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii, - MachineRegisterInfo &mri) - : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + const HexagonRegisterInfo &hri, MachineRegisterInfo &mri, + MachineFunction &mf) + : Transformation(true), HII(hii), HRI(hri), MRI(mri), MF(mf), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: struct RegHalf : public BitTracker::RegisterRef { bool Low; // Low/High halfword. @@ -1669,6 +1749,7 @@ namespace { bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH); + bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum); bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); @@ -1688,11 +1769,13 @@ namespace { const BitTracker::RegisterCell &RC); const HexagonInstrInfo &HII; + const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; + MachineFunction &MF; BitTracker &BT; }; -} +} // end anonymous namespace // Check if the bits [B..B+16) in register cell RC form a valid halfword, // i.e. [0..16), [16..32), etc. of some register. If so, return true and @@ -1746,19 +1829,19 @@ bool BitSimplification::matchHalf(unsigned SelfR, unsigned Sub = 0; switch (Pos) { case 0: - Sub = Hexagon::subreg_loreg; + Sub = Hexagon::isub_lo; Low = true; break; case 16: - Sub = Hexagon::subreg_loreg; + Sub = Hexagon::isub_lo; Low = false; break; case 32: - Sub = Hexagon::subreg_hireg; + Sub = Hexagon::isub_hi; Low = true; break; case 48: - Sub = Hexagon::subreg_hireg; + Sub = Hexagon::isub_hi; Low = false; break; default: @@ -1775,6 +1858,12 @@ bool BitSimplification::matchHalf(unsigned SelfR, return true; } +bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, + unsigned OpNum) { + auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF); + auto *RRC = HBS::getFinalVRegClass(R, MRI); + return OpRC->hasSubClassEq(RRC); +} // Check if RC matches the pattern of a S2_packhl. If so, return true and // set the inputs Rs and Rt. @@ -1799,7 +1888,6 @@ bool BitSimplification::matchPackhl(unsigned SelfR, return true; } - unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { return HLow ? LLow ? Hexagon::A2_combine_ll : Hexagon::A2_combine_lh @@ -1807,7 +1895,6 @@ unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { : Hexagon::A2_combine_hh; } - // If MI stores the upper halfword of a register (potentially obtained via // shifts or extracts), replace it with a storerf instruction. This could // cause the "extraction" code to become dead. @@ -1832,7 +1919,6 @@ bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { return true; } - // If MI stores a value known at compile-time, and the value is within a range // that avoids using constant-extenders, replace it with a store-immediate. bool BitSimplification::genStoreImmediate(MachineInstr *MI) { @@ -1901,7 +1987,6 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) { return true; } - // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the // last instruction in a sequence that results in something equivalent to // the pack-halfwords. The intent is to cause the entire sequence to become @@ -1914,6 +1999,9 @@ bool BitSimplification::genPackhl(MachineInstr *MI, BitTracker::RegisterRef Rs, Rt; if (!matchPackhl(RD.Reg, RC, Rs, Rt)) return false; + if (!validateReg(Rs, Hexagon::S2_packhl, 1) || + !validateReg(Rt, Hexagon::S2_packhl, 2)) + return false; MachineBasicBlock &B = *MI->getParent(); unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); @@ -1928,7 +2016,6 @@ bool BitSimplification::genPackhl(MachineInstr *MI, return true; } - // If MI produces halfword of the input in the low half of the output, // replace it with zero-extend or extractu. bool BitSimplification::genExtractHalf(MachineInstr *MI, @@ -1948,14 +2035,18 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI, auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); if (L.Low && Opc != Hexagon::A2_zxth) { - NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) - .addReg(L.Reg, 0, L.Sub); + if (validateReg(L, Hexagon::A2_zxth, 1)) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) + .addReg(L.Reg, 0, L.Sub); + } } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) { - NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR) - .addReg(L.Reg, 0, L.Sub) - .addImm(16); + if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR) + .addReg(L.Reg, 0, L.Sub) + .addImm(16); + } } if (NewR == 0) return false; @@ -1964,7 +2055,6 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI, return true; } - // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the // combine. bool BitSimplification::genCombineHalf(MachineInstr *MI, @@ -1981,6 +2071,8 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI, unsigned COpc = getCombineOpcode(H.Low, L.Low); if (COpc == Opc) return false; + if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2)) + return false; MachineBasicBlock &B = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); @@ -1995,7 +2087,6 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI, return true; } - // If MI resets high bits of a register and keeps the lower ones, replace it // with zero-extend byte/half, and-immediate, or extractu, as appropriate. bool BitSimplification::genExtractLow(MachineInstr *MI, @@ -2039,6 +2130,8 @@ bool BitSimplification::genExtractLow(MachineInstr *MI, continue; if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W)) continue; + if (!validateReg(RS, NewOpc, 1)) + continue; unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); auto At = MI->isPHI() ? B.getFirstNonPHI() @@ -2056,7 +2149,6 @@ bool BitSimplification::genExtractLow(MachineInstr *MI, return false; } - // Check for tstbit simplification opportunity, where the bit being checked // can be tracked back to another register. For example: // vreg2 = S2_lsr_i_r vreg1, 5 @@ -2086,19 +2178,19 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is // a double register, need to use a subregister and adjust bit // number. - unsigned P = UINT_MAX; + unsigned P = std::numeric_limits<unsigned>::max(); BitTracker::RegisterRef RR(V.RefI.Reg, 0); if (TC == &Hexagon::DoubleRegsRegClass) { P = V.RefI.Pos; - RR.Sub = Hexagon::subreg_loreg; + RR.Sub = Hexagon::isub_lo; if (P >= 32) { P -= 32; - RR.Sub = Hexagon::subreg_hireg; + RR.Sub = Hexagon::isub_hi; } } else if (TC == &Hexagon::IntRegsRegClass) { P = V.RefI.Pos; } - if (P != UINT_MAX) { + if (P != std::numeric_limits<unsigned>::max()) { unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR) .addReg(RR.Reg, 0, RR.Sub) @@ -2109,7 +2201,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, } } else if (V.is(0) || V.is(1)) { unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); - unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue; + unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true; BuildMI(B, At, DL, HII.get(NewOpc), NewR); HBS::replaceReg(RD.Reg, NewR, MRI); return true; @@ -2118,9 +2210,10 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, return false; } - bool BitSimplification::processBlock(MachineBasicBlock &B, const RegisterSet &AVs) { + if (!BT.reached(&B)) + return false; bool Changed = false; RegisterSet AVB = AVs; RegisterSet Defs; @@ -2175,7 +2268,6 @@ bool BitSimplification::processBlock(MachineBasicBlock &B, return Changed; } - bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -2203,10 +2295,14 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { RegisterSet ARE; // Available registers for RIE. RedundantInstrElimination RIE(BT, HII, MRI); - Changed |= visitBlock(Entry, RIE, ARE); + bool Ried = visitBlock(Entry, RIE, ARE); + if (Ried) { + Changed = true; + BT.run(); + } RegisterSet ACG; // Available registers for CG. - CopyGeneration CopyG(BT, HII, MRI); + CopyGeneration CopyG(BT, HII, HRI, MRI); Changed |= visitBlock(Entry, CopyG, ACG); RegisterSet ACP; // Available registers for CP. @@ -2217,7 +2313,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { BT.run(); RegisterSet ABS; // Available registers for BS. - BitSimplification BitS(BT, HII, MRI); + BitSimplification BitS(BT, HII, HRI, MRI, MF); Changed |= visitBlock(Entry, BitS, ABS); Changed = DeadCodeElimination(MF, *MDT).run() || Changed; @@ -2231,7 +2327,6 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { return Changed; } - // Recognize loops where the code at the end of the loop matches the code // before the entry of the loop, and the matching code is such that is can // be simplified. This pass relies on the bit simplification above and only @@ -2295,16 +2390,20 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { // }:endloop0 namespace llvm { + FunctionPass *createHexagonLoopRescheduling(); void initializeHexagonLoopReschedulingPass(PassRegistry&); -} + +} // end namespace llvm namespace { + class HexagonLoopRescheduling : public MachineFunctionPass { public: static char ID; + HexagonLoopRescheduling() : MachineFunctionPass(ID), - HII(0), HRI(0), MRI(0), BTP(0) { + HII(nullptr), HRI(nullptr), MRI(nullptr), BTP(nullptr) { initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); } @@ -2329,8 +2428,8 @@ namespace { struct PhiInfo { PhiInfo(MachineInstr &P, MachineBasicBlock &B); unsigned DefR; - BitTracker::RegisterRef LR, PR; - MachineBasicBlock *LB, *PB; + BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register + MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block }; static unsigned getDefReg(const MachineInstr *MI); @@ -2344,14 +2443,14 @@ namespace { MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); bool processLoop(LoopCand &C); }; -} + +} // end anonymous namespace char HexagonLoopRescheduling::ID = 0; INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", "Hexagon Loop Rescheduling", false, false) - HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, MachineBasicBlock &B) { DefR = HexagonLoopRescheduling::getDefReg(&P); @@ -2368,7 +2467,6 @@ HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, } } - unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { RegisterSet Defs; HBS::getInstrDefs(*MI, Defs); @@ -2377,7 +2475,6 @@ unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { return Defs.find_first(); } - bool HexagonLoopRescheduling::isConst(unsigned Reg) const { if (!BTP->has(Reg)) return false; @@ -2390,7 +2487,6 @@ bool HexagonLoopRescheduling::isConst(unsigned Reg) const { return true; } - bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, unsigned DefR) const { unsigned Opc = MI->getOpcode(); @@ -2421,7 +2517,6 @@ bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, return false; } - bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, unsigned InpR) const { for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { @@ -2434,7 +2529,6 @@ bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, return false; } - bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { if (!BTP->has(OutR) || !BTP->has(InpR)) return false; @@ -2449,7 +2543,6 @@ bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { return true; } - bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, unsigned &InpR2) const { if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2)) @@ -2481,7 +2574,6 @@ bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, return true; } - void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR) { @@ -2521,7 +2613,6 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); } - bool HexagonLoopRescheduling::processLoop(LoopCand &C) { DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); std::vector<PhiInfo> Phis; @@ -2595,7 +2686,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { if (UseI->getOperand(Idx+1).getMBB() != C.LB) BadUse = true; } else { - auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI); + auto F = find(ShufIns, UseI); if (F == ShufIns.end()) BadUse = true; } @@ -2661,7 +2752,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { auto LoopInpEq = [G] (const PhiInfo &P) -> bool { return G.Out.Reg == P.LR.Reg; }; - if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end()) + if (llvm::find_if(Phis, LoopInpEq) == Phis.end()) continue; G.Inp.Reg = Inputs.find_first(); @@ -2686,41 +2777,46 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { auto LoopInpEq = [G] (const PhiInfo &P) -> bool { return G.Out.Reg == P.LR.Reg; }; - auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq); + auto F = llvm::find_if(Phis, LoopInpEq); if (F == Phis.end()) continue; - unsigned PredR = 0; - if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) { - const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg); - unsigned Opc = DefPredR->getOpcode(); + unsigned PrehR = 0; + if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) { + const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg); + unsigned Opc = DefPrehR->getOpcode(); if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) continue; - if (!DefPredR->getOperand(1).isImm()) + if (!DefPrehR->getOperand(1).isImm()) continue; - if (DefPredR->getOperand(1).getImm() != 0) + if (DefPrehR->getOperand(1).getImm() != 0) continue; const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); if (RC != MRI->getRegClass(F->PR.Reg)) { - PredR = MRI->createVirtualRegister(RC); + PrehR = MRI->createVirtualRegister(RC); unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi : Hexagon::A2_tfrpi; auto T = C.PB->getFirstTerminator(); DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); - BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR) + BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR) .addImm(0); } else { - PredR = F->PR.Reg; + PrehR = F->PR.Reg; } } - assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg)); - moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR); + // isSameShuffle could match with PrehR being of a wider class than + // G.Inp.Reg, for example if G shuffles the low 32 bits of its input, + // it would match for the input being a 32-bit register, and PrehR + // being a 64-bit register (where the low 32 bits match). This could + // be handled, but for now skip these cases. + if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg)) + continue; + moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR); Changed = true; } return Changed; } - bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -2783,4 +2879,3 @@ FunctionPass *llvm::createHexagonLoopRescheduling() { FunctionPass *llvm::createHexagonBitSimplify() { return new HexagonBitSimplify(); } - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index 78b57d2..436f88d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,16 +7,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - #include "Hexagon.h" +#include "HexagonBitTracker.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" -#include "HexagonBitTracker.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <utility> +#include <vector> using namespace llvm; @@ -26,7 +40,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri, const HexagonInstrInfo &tii, MachineFunction &mf) - : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) { + : MachineEvaluator(tri, mri), MF(mf), MFI(mf.getFrameInfo()), TII(tii) { // Populate the VRX map (VR to extension-type). // Go over all the formal parameters of the function. If a given parameter // P is sign- or zero-extended, locate the virtual register holding that @@ -60,13 +74,15 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, // Module::AnyPointerSize. if (Width == 0 || Width > 64) break; + AttributeSet Attrs = F.getAttributes(); + if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal)) + continue; InPhysReg = getNextPhysReg(InPhysReg, Width); if (!InPhysReg) break; InVirtReg = getVirtRegFor(InPhysReg); if (!InVirtReg) continue; - AttributeSet Attrs = F.getAttributes(); if (Attrs.hasAttribute(AttrIdx, Attribute::SExt)) VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width))); else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt)) @@ -74,20 +90,22 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, } } - BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { + using namespace Hexagon; + if (Sub == 0) return MachineEvaluator::mask(Reg, 0); - using namespace Hexagon; const TargetRegisterClass *RC = MRI.getRegClass(Reg); unsigned ID = RC->getID(); uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); + auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI); + bool IsSubLo = (Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo)); switch (ID) { case DoubleRegsRegClassID: case VecDblRegsRegClassID: case VecDblRegs128BRegClassID: - return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1) - : BT::BitMask(RW, 2*RW-1); + return IsSubLo ? BT::BitMask(0, RW-1) + : BT::BitMask(RW, 2*RW-1); default: break; } @@ -98,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { } namespace { + class RegisterRefs { std::vector<BT::RegisterRef> Vector; @@ -113,17 +132,21 @@ public: } size_t size() const { return Vector.size(); } + const BT::RegisterRef &operator[](unsigned n) const { // The main purpose of this operator is to assert with bad argument. assert(n < Vector.size()); return Vector[n]; } }; -} + +} // end anonymous namespace bool HexagonEvaluator::evaluate(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + unsigned NumDefs = 0; // Sanity verification: there should not be any defs with subregisters. @@ -138,8 +161,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, if (NumDefs == 0) return false; - if (MI.mayLoad()) - return evaluateLoad(MI, Inputs, Outputs); + unsigned Opc = MI.getOpcode(); + + if (MI.mayLoad()) { + switch (Opc) { + // These instructions may be marked as mayLoad, but they are generating + // immediate values, so skip them. + case CONST32: + case CONST64: + break; + default: + return evaluateLoad(MI, Inputs, Outputs); + } + } // Check COPY instructions that copy formal parameters into virtual // registers. Such parameters can be sign- or zero-extended at the @@ -174,8 +208,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, } RegisterRefs Reg(MI); - unsigned Opc = MI.getOpcode(); - using namespace Hexagon; #define op(i) MI.getOperand(i) #define rc(i) RegisterCell::ref(getCell(Reg[i], Inputs)) #define im(i) MI.getOperand(i).getImm() @@ -246,16 +278,13 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case A2_tfrsi: case A2_tfrpi: case CONST32: - case CONST32_Float_Real: - case CONST32_Int_Real: - case CONST64_Float_Real: - case CONST64_Int_Real: + case CONST64: return rr0(eIMM(im(1), W0), Outputs); - case TFR_PdFalse: + case PS_false: return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs); - case TFR_PdTrue: + case PS_true: return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs); - case TFR_FI: { + case PS_fi: { int FI = op(1).getIndex(); int Off = op(2).getImm(); unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off); @@ -670,6 +699,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case A4_combineir: case A4_combineri: case A2_combinew: + case V6_vcombine: + case V6_vcombine_128B: assert(W0 % 2 == 0); return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs); case A2_combine_ll: @@ -766,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case S2_cl0: case S2_cl0p: // Always produce a 32-bit result. - return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs); case S2_cl1: case S2_cl1p: - return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs); case S2_clb: case S2_clbp: { uint16_t W1 = getRegBitWidth(Reg[1]); @@ -781,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, } case S2_ct0: case S2_ct0p: - return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs); case S2_ct1: case S2_ct1p: - return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs); case S5_popcountp: // TODO break; @@ -884,17 +915,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, const CellMapType &Inputs, BranchTargetList &Targets, bool &FallsThru) const { - // We need to evaluate one branch at a time. TII::AnalyzeBranch checks + // We need to evaluate one branch at a time. TII::analyzeBranch checks // all the branches in a basic block at once, so we cannot use it. unsigned Opc = BI.getOpcode(); bool SimpleBranch = false; bool Negated = false; switch (Opc) { case Hexagon::J2_jumpf: + case Hexagon::J2_jumpfpt: case Hexagon::J2_jumpfnew: case Hexagon::J2_jumpfnewpt: Negated = true; case Hexagon::J2_jumpt: + case Hexagon::J2_jumptpt: case Hexagon::J2_jumptnew: case Hexagon::J2_jumptnewpt: // Simple branch: if([!]Pn) jump ... @@ -938,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + if (TII.isPredicated(MI)) return false; assert(MI.mayLoad() && "A load that mayn't?"); @@ -945,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, uint16_t BitNum; bool SignEx; - using namespace Hexagon; switch (Opc) { default: @@ -986,7 +1020,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, case L2_loadrb_pci: case L2_loadrb_pcr: case L2_loadrb_pi: - case L4_loadrb_abs: + case PS_loadrbabs: case L4_loadrb_ap: case L4_loadrb_rr: case L4_loadrb_ur: @@ -1000,7 +1034,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, case L2_loadrub_pci: case L2_loadrub_pcr: case L2_loadrub_pi: - case L4_loadrub_abs: + case PS_loadrubabs: case L4_loadrub_ap: case L4_loadrub_rr: case L4_loadrub_ur: @@ -1014,7 +1048,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, case L2_loadrh_pci: case L2_loadrh_pcr: case L2_loadrh_pi: - case L4_loadrh_abs: + case PS_loadrhabs: case L4_loadrh_ap: case L4_loadrh_rr: case L4_loadrh_ur: @@ -1029,7 +1063,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, case L2_loadruh_pcr: case L2_loadruh_pi: case L4_loadruh_rr: - case L4_loadruh_abs: + case PS_loadruhabs: case L4_loadruh_ap: case L4_loadruh_ur: BitNum = 16; @@ -1043,7 +1077,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, case L2_loadri_pcr: case L2_loadri_pi: case L2_loadw_locked: - case L4_loadri_abs: + case PS_loadriabs: case L4_loadri_ap: case L4_loadri_rr: case L4_loadri_ur: @@ -1059,7 +1093,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, case L2_loadrd_pcr: case L2_loadrd_pi: case L4_loadd_locked: - case L4_loadrd_abs: + case PS_loadrdabs: case L4_loadrd_ap: case L4_loadrd_rr: case L4_loadrd_ur: @@ -1126,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI, return true; } - unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { using namespace Hexagon; + bool Is64 = DoubleRegsRegClass.contains(PReg); assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg)); @@ -1165,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0; } - unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { typedef MachineRegisterInfo::livein_iterator iterator; for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h index 9e7b1db..2cbf65e 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h @@ -1,4 +1,4 @@ -//===--- HexagonBitTracker.h ----------------------------------------------===// +//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#ifndef HEXAGONBITTRACKER_H -#define HEXAGONBITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H #include "BitTracker.h" #include "llvm/ADT/DenseMap.h" +#include <cstdint> namespace llvm { - class HexagonInstrInfo; - class HexagonRegisterInfo; + +class HexagonInstrInfo; +class HexagonRegisterInfo; struct HexagonEvaluator : public BitTracker::MachineEvaluator { typedef BitTracker::CellMapType CellMapType; @@ -49,10 +51,12 @@ private: // Type of formal parameter extension. struct ExtType { enum { SExt, ZExt }; - char Type; - uint16_t Width; - ExtType() : Type(0), Width(0) {} + + ExtType() = default; ExtType(char t, uint16_t w) : Type(t), Width(w) {} + + char Type = 0; + uint16_t Width = 0; }; // Map VR -> extension type. typedef DenseMap<unsigned, ExtType> RegExtMap; @@ -61,4 +65,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp index 5c44029..adc213c 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -12,17 +12,19 @@ #include "HexagonBlockRanges.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" - #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Compiler.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include <algorithm> +#include <cassert> +#include <iterator> #include <map> using namespace llvm; @@ -40,7 +42,6 @@ bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const { return false; } - bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const { if (start() <= A.start()) { // Treat "None" in the range end as equal to the range start. @@ -52,7 +53,6 @@ bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const { return false; } - void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) { // Allow merging adjacent ranges. assert(end() == A.start() || overlaps(A)); @@ -70,14 +70,12 @@ void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) { Fixed = true; } - void HexagonBlockRanges::RangeList::include(const RangeList &RL) { for (auto &R : RL) - if (std::find(begin(), end(), R) == end()) + if (!is_contained(*this, R)) push_back(R); } - // Merge all overlapping ranges in the list, so that all that remains // is a list of disjoint ranges. void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) { @@ -101,7 +99,6 @@ void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) { } } - // Compute a range A-B and add it to the list. void HexagonBlockRanges::RangeList::addsub(const IndexRange &A, const IndexRange &B) { @@ -138,7 +135,6 @@ void HexagonBlockRanges::RangeList::addsub(const IndexRange &A, } } - // Subtract a given range from each element in the list. void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) { // Cannot assume that the list is unionized (i.e. contains only non- @@ -156,7 +152,6 @@ void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) { include(T); } - HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B) : Block(B) { IndexType Idx = IndexType::First; @@ -171,13 +166,11 @@ HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B) Last = B.empty() ? IndexType::None : unsigned(Idx)-1; } - MachineInstr *HexagonBlockRanges::InstrIndexMap::getInstr(IndexType Idx) const { auto F = Map.find(Idx); - return (F != Map.end()) ? F->second : 0; + return (F != Map.end()) ? F->second : nullptr; } - HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex( MachineInstr *MI) const { for (auto &I : Map) @@ -186,7 +179,6 @@ HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex( return IndexType::None; } - HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex( IndexType Idx) const { assert (Idx != IndexType::None); @@ -199,7 +191,6 @@ HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex( return unsigned(Idx)-1; } - HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex( IndexType Idx) const { assert (Idx != IndexType::None); @@ -210,7 +201,6 @@ HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex( return unsigned(Idx)+1; } - void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI) { for (auto &I : Map) { @@ -224,7 +214,6 @@ void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI, } } - HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf) : MF(mf), HST(mf.getSubtarget<HexagonSubtarget>()), TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()), @@ -239,17 +228,33 @@ HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf) } } - HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns( - const MachineBasicBlock &B) { + const MachineBasicBlock &B, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) { RegisterSet LiveIns; - for (auto I : B.liveins()) - if (!Reserved[I.PhysReg]) - LiveIns.insert({I.PhysReg, 0}); + RegisterSet Tmp; + for (auto I : B.liveins()) { + if (I.LaneMask.all()) { + Tmp.insert({I.PhysReg,0}); + continue; + } + for (MCSubRegIndexIterator S(I.PhysReg, &TRI); S.isValid(); ++S) { + LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex()); + if ((M & I.LaneMask).any()) + Tmp.insert({S.getSubReg(), 0}); + } + } + + for (auto R : Tmp) { + if (!Reserved[R.Reg]) + LiveIns.insert(R); + for (auto S : expandToSubRegs(R, MRI, TRI)) + if (!Reserved[S.Reg]) + LiveIns.insert(S); + } return LiveIns; } - HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs( RegisterRef R, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) { @@ -279,7 +284,6 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs( return SRs; } - void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) { std::map<RegisterRef,IndexType> LastDef, LastUse; @@ -287,9 +291,8 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, MachineBasicBlock &B = IndexMap.getBlock(); MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); - for (auto R : getLiveIns(B)) - for (auto S : expandToSubRegs(R, MRI, TRI)) - LiveOnEntry.insert(S); + for (auto R : getLiveIns(B, MRI, TRI)) + LiveOnEntry.insert(R); for (auto R : LiveOnEntry) LastDef[R] = IndexType::Entry; @@ -340,9 +343,8 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, // Collect live-on-exit. RegisterSet LiveOnExit; for (auto *SB : B.successors()) - for (auto R : getLiveIns(*SB)) - for (auto S : expandToSubRegs(R, MRI, TRI)) - LiveOnExit.insert(S); + for (auto R : getLiveIns(*SB, MRI, TRI)) + LiveOnExit.insert(R); for (auto R : LiveOnExit) LastUse[R] = IndexType::Exit; @@ -363,18 +365,16 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, P.second.unionize(); } - HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap( InstrIndexMap &IndexMap) { RegToRangeMap LiveMap; - DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": index map\n" << IndexMap << '\n'); + DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n'); computeInitialLiveRanges(IndexMap, LiveMap); - DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": live map\n" + DEBUG(dbgs() << __func__ << ": live map\n" << PrintRangeMap(LiveMap, TRI) << '\n'); return LiveMap; } - HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap( InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) { RegToRangeMap DeadMap; @@ -432,7 +432,7 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap( if (TargetRegisterInfo::isVirtualRegister(P.first.Reg)) addDeadRanges(P.first); - DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": dead map\n" + DEBUG(dbgs() << __func__ << ": dead map\n" << PrintRangeMap(DeadMap, TRI) << '\n'); return DeadMap; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h index 9c3f938..7174803 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h @@ -1,4 +1,4 @@ -//===--- HexagonBlockRanges.h ---------------------------------------------===// +//===--- HexagonBlockRanges.h -----------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,23 +11,21 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/MC/MCRegisterInfo.h" // For MCPhysReg. +#include <cassert> #include <map> #include <set> #include <vector> +#include <utility> namespace llvm { - class Function; - class HexagonSubtarget; - class MachineBasicBlock; - class MachineFunction; - class MachineInstr; - class MCInstrDesc; - class raw_ostream; - class TargetInstrInfo; - class TargetRegisterClass; - class TargetRegisterInfo; - class Type; + +class HexagonSubtarget; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class raw_ostream; +class TargetInstrInfo; +class TargetRegisterInfo; struct HexagonBlockRanges { HexagonBlockRanges(MachineFunction &MF); @@ -50,10 +48,12 @@ struct HexagonBlockRanges { Exit = 2, First = 11 // 10th + 1st }; - static bool isInstr(IndexType X) { return X.Index >= First; } IndexType() : Index(None) {} IndexType(unsigned Idx) : Index(Idx) {} + + static bool isInstr(IndexType X) { return X.Index >= First; } + operator unsigned() const; bool operator== (unsigned x) const; bool operator== (IndexType Idx) const; @@ -76,21 +76,23 @@ struct HexagonBlockRanges { // register is dead. class IndexRange : public std::pair<IndexType,IndexType> { public: - IndexRange() : Fixed(false), TiedEnd(false) {} + IndexRange() = default; IndexRange(IndexType Start, IndexType End, bool F = false, bool T = false) : std::pair<IndexType,IndexType>(Start, End), Fixed(F), TiedEnd(T) {} + IndexType start() const { return first; } IndexType end() const { return second; } bool operator< (const IndexRange &A) const { return start() < A.start(); } + bool overlaps(const IndexRange &A) const; bool contains(const IndexRange &A) const; void merge(const IndexRange &A); - bool Fixed; // Can be renamed? "Fixed" means "no". - bool TiedEnd; // The end is not a use, but a dead def tied to a use. + bool Fixed = false; // Can be renamed? "Fixed" means "no". + bool TiedEnd = false; // The end is not a use, but a dead def tied to a use. private: void setStart(const IndexType &S) { first = S; } @@ -107,6 +109,7 @@ struct HexagonBlockRanges { void add(const IndexRange &Range) { push_back(Range); } + void include(const RangeList &RL); void unionize(bool MergeAdjacent = false); void subtract(const IndexRange &Range); @@ -118,6 +121,7 @@ struct HexagonBlockRanges { class InstrIndexMap { public: InstrIndexMap(MachineBasicBlock &B); + MachineInstr *getInstr(IndexType Idx) const; IndexType getIndex(MachineInstr *MI) const; MachineBasicBlock &getBlock() const { return Block; } @@ -126,6 +130,7 @@ struct HexagonBlockRanges { void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI); friend raw_ostream &operator<< (raw_ostream &OS, const InstrIndexMap &Map); + IndexType First, Last; private: @@ -144,13 +149,15 @@ struct HexagonBlockRanges { : Map(M), TRI(I) {} friend raw_ostream &operator<< (raw_ostream &OS, const PrintRangeMap &P); + private: const RegToRangeMap ⤅ const TargetRegisterInfo &TRI; }; private: - RegisterSet getLiveIns(const MachineBasicBlock &B); + RegisterSet getLiveIns(const MachineBasicBlock &B, + const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI); void computeInitialLiveRanges(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap); @@ -162,7 +169,6 @@ private: BitVector Reserved; }; - inline HexagonBlockRanges::IndexType::operator unsigned() const { assert(Index >= First); return Index; @@ -223,7 +229,6 @@ inline bool HexagonBlockRanges::IndexType::operator<= (IndexType Idx) const { return operator==(Idx) || operator<(Idx); } - raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx); raw_ostream &operator<< (raw_ostream &OS, const HexagonBlockRanges::IndexRange &IR); @@ -234,6 +239,6 @@ raw_ostream &operator<< (raw_ostream &OS, raw_ostream &operator<< (raw_ostream &OS, const HexagonBlockRanges::PrintRangeMap &P); -} // namespace llvm +} // end namespace llvm -#endif +#endif // HEXAGON_BLOCK_RANGES_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp index f042baf..84af4b1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp @@ -12,15 +12,23 @@ #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/PassSupport.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> using namespace llvm; @@ -30,21 +38,25 @@ static cl::opt<uint32_t> BranchRelaxSafetyBuffer("branch-relax-safety-buffer", cl::init(200), cl::Hidden, cl::ZeroOrMore, cl::desc("safety buffer size")); namespace llvm { + FunctionPass *createHexagonBranchRelaxation(); void initializeHexagonBranchRelaxationPass(PassRegistry&); -} + +} // end namespace llvm namespace { + struct HexagonBranchRelaxation : public MachineFunctionPass { public: static char ID; + HexagonBranchRelaxation() : MachineFunctionPass(ID) { initializeHexagonBranchRelaxationPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const override { + StringRef getPassName() const override { return "Hexagon Branch Relaxation"; } @@ -67,6 +79,7 @@ namespace { }; char HexagonBranchRelaxation::ID = 0; + } // end anonymous namespace INITIALIZE_PASS(HexagonBranchRelaxation, "hexagon-brelax", @@ -76,7 +89,6 @@ FunctionPass *llvm::createHexagonBranchRelaxation() { return new HexagonBranchRelaxation(); } - bool HexagonBranchRelaxation::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "****** Hexagon Branch Relaxation ******\n"); @@ -89,7 +101,6 @@ bool HexagonBranchRelaxation::runOnMachineFunction(MachineFunction &MF) { return Changed; } - void HexagonBranchRelaxation::computeOffset(MachineFunction &MF, DenseMap<MachineBasicBlock*, unsigned> &OffsetMap) { // offset of the current instruction from the start. @@ -104,11 +115,10 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF, } OffsetMap[&B] = InstOffset; for (auto &MI : B.instrs()) - InstOffset += HII->getSize(&MI); + InstOffset += HII->getSize(MI); } } - /// relaxBranches - For Hexagon, if the jump target/loop label is too far from /// the jump/loop instruction then, we need to make sure that we have constant /// extenders set for jumps and loops. @@ -124,7 +134,6 @@ bool HexagonBranchRelaxation::relaxBranches(MachineFunction &MF) { return reGenerateBranch(MF, BlockToInstOffset); } - /// Check if a given instruction is: /// - a jump to a distant target /// - that exceeds its immediate range @@ -144,7 +153,7 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI, // Number of instructions times typical instruction size. InstOffset += HII->nonDbgBBSize(&B) * HEXAGON_INSTR_SIZE; - MachineBasicBlock *TBB = NULL, *FBB = NULL; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; // Try to analyze this branch. @@ -152,13 +161,13 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI, // Could not analyze it. See if this is something we can recognize. // If it is a NVJ, it should always have its target in // a fixed location. - if (HII->isNewValueJump(&*FirstTerm)) - TBB = FirstTerm->getOperand(HII->getCExtOpNum(&*FirstTerm)).getMBB(); + if (HII->isNewValueJump(*FirstTerm)) + TBB = FirstTerm->getOperand(HII->getCExtOpNum(*FirstTerm)).getMBB(); } if (TBB && &MI == &*FirstTerm) { Distance = std::abs((long long)InstOffset - BlockToInstOffset[TBB]) + BranchRelaxSafetyBuffer; - return !HII->isJumpWithinBranchRange(&*FirstTerm, Distance); + return !HII->isJumpWithinBranchRange(*FirstTerm, Distance); } if (FBB) { // Look for second terminator. @@ -171,12 +180,11 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI, // Analyze the second branch in the BB. Distance = std::abs((long long)InstOffset - BlockToInstOffset[FBB]) + BranchRelaxSafetyBuffer; - return !HII->isJumpWithinBranchRange(&*SecondTerm, Distance); + return !HII->isJumpWithinBranchRange(*SecondTerm, Distance); } return false; } - bool HexagonBranchRelaxation::reGenerateBranch(MachineFunction &MF, DenseMap<MachineBasicBlock*, unsigned> &BlockToInstOffset) { bool Changed = false; @@ -186,16 +194,16 @@ bool HexagonBranchRelaxation::reGenerateBranch(MachineFunction &MF, if (!MI.isBranch() || !isJumpOutOfRange(MI, BlockToInstOffset)) continue; DEBUG(dbgs() << "Long distance jump. isExtendable(" - << HII->isExtendable(&MI) << ") isConstExtended(" - << HII->isConstExtended(&MI) << ") " << MI); + << HII->isExtendable(MI) << ") isConstExtended(" + << HII->isConstExtended(MI) << ") " << MI); // Since we have not merged HW loops relaxation into // this code (yet), soften our approach for the moment. - if (!HII->isExtendable(&MI) && !HII->isExtended(&MI)) { + if (!HII->isExtendable(MI) && !HII->isExtended(MI)) { DEBUG(dbgs() << "\tUnderimplemented relax branch instruction.\n"); } else { // Find which operand is expandable. - int ExtOpNum = HII->getCExtOpNum(&MI); + int ExtOpNum = HII->getCExtOpNum(MI); MachineOperand &MO = MI.getOperand(ExtOpNum); // This need to be something we understand. So far we assume all // branches have only MBB address as expandable field. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 559bdfb..2f8fe6e 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -45,13 +45,11 @@ public: initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const override { - return "Hexagon CFG Optimizer"; - } + StringRef getPassName() const override { return "Hexagon CFG Optimizer"; } bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } }; @@ -59,8 +57,18 @@ public: char HexagonCFGOptimizer::ID = 0; static bool IsConditionalBranch(int Opc) { - return (Opc == Hexagon::J2_jumpt) || (Opc == Hexagon::J2_jumpf) - || (Opc == Hexagon::J2_jumptnewpt) || (Opc == Hexagon::J2_jumpfnewpt); + switch (Opc) { + case Hexagon::J2_jumpt: + case Hexagon::J2_jumptpt: + case Hexagon::J2_jumpf: + case Hexagon::J2_jumpfpt: + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnewpt: + return true; + } + return false; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index b612b11..489da6b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -9,30 +9,43 @@ #define DEBUG_TYPE "commgep" -#include "llvm/Pass.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" - +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> #include <map> #include <set> +#include <utility> #include <vector> -#include "HexagonTargetMachine.h" - using namespace llvm; static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true), @@ -45,10 +58,13 @@ static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true), cl::Hidden, cl::ZeroOrMore); namespace llvm { + void initializeHexagonCommonGEPPass(PassRegistry&); -} + +} // end namespace llvm namespace { + struct GepNode; typedef std::set<GepNode*> NodeSet; typedef std::map<GepNode*,Value*> NodeToValueMap; @@ -60,7 +76,7 @@ namespace { // Numbering map for gep nodes. Used to keep track of ordering for // gep nodes. struct NodeOrdering { - NodeOrdering() : LastNum(0) {} + NodeOrdering() = default; void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); } void clear() { Map.clear(); } @@ -73,21 +89,21 @@ namespace { private: std::map<const GepNode *, unsigned> Map; - unsigned LastNum; + unsigned LastNum = 0; }; class HexagonCommonGEP : public FunctionPass { public: static char ID; + HexagonCommonGEP() : FunctionPass(ID) { initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); - virtual const char *getPassName() const { - return "Hexagon Common GEP"; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + bool runOnFunction(Function &F) override; + StringRef getPassName() const override { return "Hexagon Common GEP"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addRequired<PostDominatorTreeWrapperPass>(); @@ -140,8 +156,8 @@ namespace { PostDominatorTree *PDT; Function *Fn; }; -} +} // end anonymous namespace char HexagonCommonGEP::ID = 0; INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", @@ -153,6 +169,7 @@ INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", false, false) namespace { + struct GepNode { enum { None = 0, @@ -169,18 +186,20 @@ namespace { Value *Idx; Type *PTy; // Type of the pointer operand. - GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {} + GepNode() : Flags(0), Parent(nullptr), Idx(nullptr), PTy(nullptr) {} GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) { if (Flags & Root) BaseVal = N->BaseVal; else Parent = N->Parent; } + friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN); }; - Type *next_type(Type *Ty, Value *Idx) { + if (auto *PTy = dyn_cast<PointerType>(Ty)) + return PTy->getElementType(); // Advance the type. if (!Ty->isStructTy()) { Type *NexTy = cast<SequentialType>(Ty)->getElementType(); @@ -194,7 +213,6 @@ namespace { return NextTy; } - raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) { OS << "{ {"; bool Comma = false; @@ -241,7 +259,6 @@ namespace { return OS; } - template <typename NodeContainer> void dump_node_container(raw_ostream &OS, const NodeContainer &S) { typedef typename NodeContainer::const_iterator const_iterator; @@ -256,7 +273,6 @@ namespace { return OS; } - raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED; raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){ @@ -276,23 +292,22 @@ namespace { return OS; } - struct in_set { in_set(const NodeSet &S) : NS(S) {} bool operator() (GepNode *N) const { return NS.find(N) != NS.end(); } + private: const NodeSet &NS; }; -} +} // end anonymous namespace inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) { return A.Allocate(); } - void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root, ValueVect &Order) { // Compute block ordering for a typical DT-based traversal of the flow @@ -307,7 +322,6 @@ void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root, getBlockTraversalOrder((*I)->getBlock(), Order); } - bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) { // No vector GEPs. if (!GepI->getType()->isPointerTy()) @@ -318,7 +332,6 @@ bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) { return true; } - void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM) { DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); @@ -384,7 +397,6 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, NM.insert(std::make_pair(GepI, PN)); } - void HexagonCommonGEP::collect() { // Establish depth-first traversal order of the dominator tree. ValueVect BO; @@ -408,10 +420,8 @@ void HexagonCommonGEP::collect() { DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes); } - -namespace { - void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM, - NodeVect &Roots) { +static void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM, + NodeVect &Roots) { typedef NodeVect::const_iterator const_iterator; for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { GepNode *N = *I; @@ -422,9 +432,10 @@ namespace { GepNode *PN = N->Parent; NCM[PN].push_back(N); } - } +} - void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) { +static void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, + NodeSet &Nodes) { NodeVect Work; Work.push_back(Root); Nodes.insert(Root); @@ -439,41 +450,43 @@ namespace { Nodes.insert(CF->second.begin(), CF->second.end()); } } - } } - namespace { + typedef std::set<NodeSet> NodeSymRel; typedef std::pair<GepNode*,GepNode*> NodePair; typedef std::set<NodePair> NodePairSet; - const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) { +} // end anonymous namespace + +static const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) { for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I) if (I->count(N)) return &*I; - return 0; - } + return nullptr; +} // Create an ordered pair of GepNode pointers. The pair will be used in // determining equality. The only purpose of the ordering is to eliminate // duplication due to the commutativity of equality/non-equality. - NodePair node_pair(GepNode *N1, GepNode *N2) { +static NodePair node_pair(GepNode *N1, GepNode *N2) { uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2); if (P1 <= P2) return std::make_pair(N1, N2); return std::make_pair(N2, N1); - } +} - unsigned node_hash(GepNode *N) { +static unsigned node_hash(GepNode *N) { // Include everything except flags and parent. FoldingSetNodeID ID; ID.AddPointer(N->Idx); ID.AddPointer(N->PTy); return ID.ComputeHash(); - } +} - bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) { +static bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, + NodePairSet &Ne) { // Don't cache the result for nodes with different hashes. The hash // comparison is fast enough. if (node_hash(N1) != node_hash(N2)) @@ -505,10 +518,8 @@ namespace { return true; } return false; - } } - void HexagonCommonGEP::common() { // The essence of this commoning is finding gep nodes that are equal. // To do this we need to compare all pairs of nodes. To save time, @@ -572,7 +583,6 @@ void HexagonCommonGEP::common() { } }); - // Create a projection from a NodeSet to the minimal element in it. typedef std::map<const NodeSet*,GepNode*> ProjMap; ProjMap PM; @@ -639,17 +649,14 @@ void HexagonCommonGEP::common() { // Node for removal. Erase.insert(*I); } - NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(), - in_set(Erase)); + NodeVect::iterator NewE = remove_if(Nodes, in_set(Erase)); Nodes.resize(std::distance(Nodes.begin(), NewE)); DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes); } - -namespace { - template <typename T> - BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) { +template <typename T> +static BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) { DEBUG({ dbgs() << "NCD of {"; for (typename T::iterator I = Blocks.begin(), E = Blocks.end(); @@ -662,23 +669,23 @@ namespace { dbgs() << " }\n"; }); - // Allow null basic blocks in Blocks. In such cases, return 0. + // Allow null basic blocks in Blocks. In such cases, return nullptr. typename T::iterator I = Blocks.begin(), E = Blocks.end(); if (I == E || !*I) - return 0; + return nullptr; BasicBlock *Dom = cast<BasicBlock>(*I); while (++I != E) { BasicBlock *B = cast_or_null<BasicBlock>(*I); - Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0; + Dom = B ? DT->findNearestCommonDominator(Dom, B) : nullptr; if (!Dom) - return 0; + return nullptr; } DEBUG(dbgs() << "computed:" << Dom->getName() << '\n'); return Dom; - } +} - template <typename T> - BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) { +template <typename T> +static BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) { // If two blocks, A and B, dominate a block C, then A dominates B, // or B dominates A. typename T::iterator I = Blocks.begin(), E = Blocks.end(); @@ -695,16 +702,16 @@ namespace { if (DT->dominates(B, DomB)) continue; if (!DT->dominates(DomB, B)) - return 0; + return nullptr; DomB = B; } return DomB; - } +} - // Find the first use in B of any value from Values. If no such use, - // return B->end(). - template <typename T> - BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) { +// Find the first use in B of any value from Values. If no such use, +// return B->end(). +template <typename T> +static BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) { BasicBlock::iterator FirstUse = B->end(), BEnd = B->end(); typedef typename T::iterator iterator; for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) { @@ -726,20 +733,18 @@ namespace { FirstUse = It; } return FirstUse; - } +} - bool is_empty(const BasicBlock *B) { +static bool is_empty(const BasicBlock *B) { return B->empty() || (&*B->begin() == B->getTerminator()); - } } - BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM, NodeToValueMap &Loc) { DEBUG(dbgs() << "Loc for node:" << Node << '\n'); // Recalculate the placement for Node, assuming that the locations of // its children in Loc are valid. - // Return 0 if there is no valid placement for Node (for example, it + // Return nullptr if there is no valid placement for Node (for example, it // uses an index value that is not available at the location required // to dominate all children, etc.). @@ -782,11 +787,11 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, BasicBlock *DomB = nearest_common_dominator(DT, Bs); if (!DomB) - return 0; + return nullptr; // Check if the index used by Node dominates the computed dominator. Instruction *IdxI = dyn_cast<Instruction>(Node->Idx); if (IdxI && !DT->dominates(IdxI->getParent(), DomB)) - return 0; + return nullptr; // Avoid putting nodes into empty blocks. while (is_empty(DomB)) { @@ -801,7 +806,6 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, return DomB; } - BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM, NodeToValueMap &Loc) { DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n'); @@ -818,7 +822,6 @@ BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node, return LB; } - bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) { if (isa<Constant>(Val) || isa<Argument>(Val)) return true; @@ -829,7 +832,6 @@ bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) { return DT->properlyDominates(DefB, HdrB); } - bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) { if (Node->Flags & GepNode::Root) if (!isInvariantIn(Node->BaseVal, L)) @@ -837,7 +839,6 @@ bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) { return isInvariantIn(Node->Idx, L); } - bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) { BasicBlock *HB = L->getHeader(); BasicBlock *LB = L->getLoopLatch(); @@ -849,21 +850,17 @@ bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) { return false; } - -namespace { - BasicBlock *preheader(DominatorTree *DT, Loop *L) { - if (BasicBlock *PH = L->getLoopPreheader()) - return PH; - if (!OptSpeculate) - return 0; - DomTreeNode *DN = DT->getNode(L->getHeader()); - if (!DN) - return 0; - return DN->getIDom()->getBlock(); - } +static BasicBlock *preheader(DominatorTree *DT, Loop *L) { + if (BasicBlock *PH = L->getLoopPreheader()) + return PH; + if (!OptSpeculate) + return nullptr; + DomTreeNode *DN = DT->getNode(L->getHeader()); + if (!DN) + return nullptr; + return DN->getIDom()->getBlock(); } - BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node, NodeChildrenMap &NCM, NodeToValueMap &Loc) { // Find the "topmost" location for Node: it must be dominated by both, @@ -913,10 +910,11 @@ BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node, return LocB; } - namespace { + struct LocationAsBlock { LocationAsBlock(const NodeToValueMap &L) : Map(L) {} + const NodeToValueMap ⤅ }; @@ -936,8 +934,8 @@ namespace { inline bool is_constant(GepNode *N) { return isa<ConstantInt>(N->Idx); } -} +} // end anonymous namespace void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc) { @@ -947,7 +945,7 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U, BasicBlock *PB = cast<Instruction>(R)->getParent(); GepNode *N = Node; - GepNode *C = 0, *NewNode = 0; + GepNode *C = nullptr, *NewNode = nullptr; while (is_constant(N) && !(N->Flags & GepNode::Root)) { // XXX if (single-use) dont-replicate; GepNode *NewN = new (*Mem) GepNode(N); @@ -991,7 +989,6 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U, Uses[NewNode] = NewUs; } - void HexagonCommonGEP::separateConstantChains(GepNode *Node, NodeChildrenMap &NCM, NodeToValueMap &Loc) { // First approximation: extract all chains. @@ -1045,7 +1042,6 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node, } } - void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) { // Compute the inverse of the Node.Parent links. Also, collect the set // of root nodes. @@ -1080,7 +1076,6 @@ void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) { DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc)); } - Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, BasicBlock *LocB) { DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName() @@ -1089,7 +1084,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, GepNode *RN = NA[0]; assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); - Value *NewInst = 0; + Value *NewInst = nullptr; Value *Input = RN->BaseVal; Value **IdxList = new Value*[Num+1]; unsigned nax = 0; @@ -1128,7 +1123,6 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, return NewInst; } - void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values, NodeChildrenMap &NCM) { NodeVect Work; @@ -1153,7 +1147,6 @@ void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values, } } - void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n'); NodeChildrenMap NCM; @@ -1192,7 +1185,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { break; GepNode *Child = CF->second.front(); BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]); - if (ChildB != 0 && LastB != ChildB) + if (ChildB != nullptr && LastB != ChildB) break; Last = Child; } while (true); @@ -1236,7 +1229,6 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { } } - void HexagonCommonGEP::removeDeadCode() { ValueVect BO; BO.push_back(&Fn->front()); @@ -1265,7 +1257,6 @@ void HexagonCommonGEP::removeDeadCode() { } } - bool HexagonCommonGEP::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -1304,9 +1295,10 @@ bool HexagonCommonGEP::runOnFunction(Function &F) { return true; } - namespace llvm { + FunctionPass *createHexagonCommonGEP() { return new HexagonCommonGEP(); } -} + +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp new file mode 100644 index 0000000..783b916 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -0,0 +1,3149 @@ +//===--- HexagonConstPropagation.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hcp" + +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <map> +#include <queue> +#include <set> +#include <utility> +#include <vector> + +using namespace llvm; + +namespace { + + // Properties of a value that are tracked by the propagation. + // A property that is marked as present (i.e. bit is set) dentes that the + // value is known (proven) to have this property. Not all combinations + // of bits make sense, for example Zero and NonZero are mutually exclusive, + // but on the other hand, Zero implies Finite. In this case, whenever + // the Zero property is present, Finite should also be present. + class ConstantProperties { + public: + enum { + Unknown = 0x0000, + Zero = 0x0001, + NonZero = 0x0002, + Finite = 0x0004, + Infinity = 0x0008, + NaN = 0x0010, + SignedZero = 0x0020, + NumericProperties = (Zero|NonZero|Finite|Infinity|NaN|SignedZero), + PosOrZero = 0x0100, + NegOrZero = 0x0200, + SignProperties = (PosOrZero|NegOrZero), + Everything = (NumericProperties|SignProperties) + }; + + // For a given constant, deduce the set of trackable properties that this + // constant has. + static uint32_t deduce(const Constant *C); + }; + + // A representation of a register as it can appear in a MachineOperand, + // i.e. a pair register:subregister. + struct Register { + unsigned Reg, SubReg; + + explicit Register(unsigned R, unsigned SR = 0) : Reg(R), SubReg(SR) {} + explicit Register(const MachineOperand &MO) + : Reg(MO.getReg()), SubReg(MO.getSubReg()) {} + + void print(const TargetRegisterInfo *TRI = nullptr) const { + dbgs() << PrintReg(Reg, TRI, SubReg); + } + + bool operator== (const Register &R) const { + return (Reg == R.Reg) && (SubReg == R.SubReg); + } + }; + + // Lattice cell, based on that was described in the W-Z paper on constant + // propagation. + // Latice cell will be allowed to hold multiple constant values. While + // multiple values would normally indicate "bottom", we can still derive + // some useful information from them. For example, comparison X > 0 + // could be folded if all the values in the cell associated with X are + // positive. + class LatticeCell { + private: + enum { Normal, Top, Bottom }; + + static const unsigned MaxCellSize = 4; + + unsigned Kind:2; + unsigned Size:3; + unsigned IsSpecial:1; + unsigned :0; + + public: + union { + uint32_t Properties; + const Constant *Value; + const Constant *Values[MaxCellSize]; + }; + + LatticeCell() : Kind(Top), Size(0), IsSpecial(false) { + for (unsigned i = 0; i < MaxCellSize; ++i) + Values[i] = nullptr; + } + + bool meet(const LatticeCell &L); + bool add(const Constant *C); + bool add(uint32_t Property); + uint32_t properties() const; + unsigned size() const { return Size; } + + LatticeCell &operator= (const LatticeCell &L) { + if (this != &L) { + // This memcpy also copies Properties (when L.Size == 0). + uint32_t N = L.IsSpecial ? sizeof L.Properties + : L.Size*sizeof(const Constant*); + memcpy(Values, L.Values, N); + Kind = L.Kind; + Size = L.Size; + IsSpecial = L.IsSpecial; + } + return *this; + } + + bool isSingle() const { return size() == 1; } + bool isProperty() const { return IsSpecial; } + bool isTop() const { return Kind == Top; } + bool isBottom() const { return Kind == Bottom; } + + bool setBottom() { + bool Changed = (Kind != Bottom); + Kind = Bottom; + Size = 0; + IsSpecial = false; + return Changed; + } + + void print(raw_ostream &os) const; + + private: + void setProperty() { + IsSpecial = true; + Size = 0; + Kind = Normal; + } + + bool convertToProperty(); + }; + + raw_ostream &operator<< (raw_ostream &os, const LatticeCell &L) { + L.print(os); + return os; + } + + class MachineConstEvaluator; + + class MachineConstPropagator { + public: + MachineConstPropagator(MachineConstEvaluator &E) : MCE(E) { + Bottom.setBottom(); + } + + // Mapping: vreg -> cell + // The keys are registers _without_ subregisters. This won't allow + // definitions in the form of "vreg:subreg<def> = ...". Such definitions + // would be questionable from the point of view of SSA, since the "vreg" + // could not be initialized in its entirety (specifically, an instruction + // defining the "other part" of "vreg" would also count as a definition + // of "vreg", which would violate the SSA). + // If a value of a pair vreg:subreg needs to be obtained, the cell for + // "vreg" needs to be looked up, and then the value of subregister "subreg" + // needs to be evaluated. + class CellMap { + public: + CellMap() { + assert(Top.isTop()); + Bottom.setBottom(); + } + + void clear() { Map.clear(); } + + bool has(unsigned R) const { + // All non-virtual registers are considered "bottom". + if (!TargetRegisterInfo::isVirtualRegister(R)) + return true; + MapType::const_iterator F = Map.find(R); + return F != Map.end(); + } + + const LatticeCell &get(unsigned R) const { + if (!TargetRegisterInfo::isVirtualRegister(R)) + return Bottom; + MapType::const_iterator F = Map.find(R); + if (F != Map.end()) + return F->second; + return Top; + } + + // Invalidates any const references. + void update(unsigned R, const LatticeCell &L) { + Map[R] = L; + } + + void print(raw_ostream &os, const TargetRegisterInfo &TRI) const; + + private: + typedef std::map<unsigned,LatticeCell> MapType; + MapType Map; + // To avoid creating "top" entries, return a const reference to + // this cell in "get". Also, have a "Bottom" cell to return from + // get when a value of a physical register is requested. + LatticeCell Top, Bottom; + + public: + typedef MapType::const_iterator const_iterator; + const_iterator begin() const { return Map.begin(); } + const_iterator end() const { return Map.end(); } + }; + + bool run(MachineFunction &MF); + + private: + void visitPHI(const MachineInstr &PN); + void visitNonBranch(const MachineInstr &MI); + void visitBranchesFrom(const MachineInstr &BrI); + void visitUsesOf(unsigned R); + bool computeBlockSuccessors(const MachineBasicBlock *MB, + SetVector<const MachineBasicBlock*> &Targets); + void removeCFGEdge(MachineBasicBlock *From, MachineBasicBlock *To); + + void propagate(MachineFunction &MF); + bool rewrite(MachineFunction &MF); + + MachineRegisterInfo *MRI; + MachineConstEvaluator &MCE; + + typedef std::pair<unsigned,unsigned> CFGEdge; + typedef std::set<CFGEdge> SetOfCFGEdge; + typedef std::set<const MachineInstr*> SetOfInstr; + typedef std::queue<CFGEdge> QueueOfCFGEdge; + + LatticeCell Bottom; + CellMap Cells; + SetOfCFGEdge EdgeExec; + SetOfInstr InstrExec; + QueueOfCFGEdge FlowQ; + }; + + // The "evaluator/rewriter" of machine instructions. This is an abstract + // base class that provides the interface that the propagator will use, + // as well as some helper functions that are target-independent. + class MachineConstEvaluator { + public: + MachineConstEvaluator(MachineFunction &Fn) + : TRI(*Fn.getSubtarget().getRegisterInfo()), + MF(Fn), CX(Fn.getFunction()->getContext()) {} + virtual ~MachineConstEvaluator() = default; + + // The required interface: + // - A set of three "evaluate" functions. Each returns "true" if the + // computation succeeded, "false" otherwise. + // (1) Given an instruction MI, and the map with input values "Inputs", + // compute the set of output values "Outputs". An example of when + // the computation can "fail" is if MI is not an instruction that + // is recognized by the evaluator. + // (2) Given a register R (as reg:subreg), compute the cell that + // corresponds to the "subreg" part of the given register. + // (3) Given a branch instruction BrI, compute the set of target blocks. + // If the branch can fall-through, add null (0) to the list of + // possible targets. + // - A function "rewrite", that given the cell map after propagation, + // could rewrite instruction MI in a more beneficial form. Return + // "true" if a change has been made, "false" otherwise. + typedef MachineConstPropagator::CellMap CellMap; + virtual bool evaluate(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs) = 0; + virtual bool evaluate(const Register &R, const LatticeCell &SrcC, + LatticeCell &Result) = 0; + virtual bool evaluate(const MachineInstr &BrI, const CellMap &Inputs, + SetVector<const MachineBasicBlock*> &Targets, + bool &CanFallThru) = 0; + virtual bool rewrite(MachineInstr &MI, const CellMap &Inputs) = 0; + + const TargetRegisterInfo &TRI; + + protected: + MachineFunction &MF; + LLVMContext &CX; + + struct Comparison { + enum { + Unk = 0x00, + EQ = 0x01, + NE = 0x02, + L = 0x04, // Less-than property. + G = 0x08, // Greater-than property. + U = 0x40, // Unsigned property. + LTs = L, + LEs = L | EQ, + GTs = G, + GEs = G | EQ, + LTu = L | U, + LEu = L | EQ | U, + GTu = G | U, + GEu = G | EQ | U + }; + + static uint32_t negate(uint32_t Cmp) { + if (Cmp == EQ) + return NE; + if (Cmp == NE) + return EQ; + assert((Cmp & (L|G)) != (L|G)); + return Cmp ^ (L|G); + } + }; + + // Helper functions. + + bool getCell(const Register &R, const CellMap &Inputs, LatticeCell &RC); + bool constToInt(const Constant *C, APInt &Val) const; + bool constToFloat(const Constant *C, APFloat &Val) const; + const ConstantInt *intToConst(const APInt &Val) const; + + // Compares. + bool evaluateCMPrr(uint32_t Cmp, const Register &R1, const Register &R2, + const CellMap &Inputs, bool &Result); + bool evaluateCMPri(uint32_t Cmp, const Register &R1, const APInt &A2, + const CellMap &Inputs, bool &Result); + bool evaluateCMPrp(uint32_t Cmp, const Register &R1, uint64_t Props2, + const CellMap &Inputs, bool &Result); + bool evaluateCMPii(uint32_t Cmp, const APInt &A1, const APInt &A2, + bool &Result); + bool evaluateCMPpi(uint32_t Cmp, uint32_t Props, const APInt &A2, + bool &Result); + bool evaluateCMPpp(uint32_t Cmp, uint32_t Props1, uint32_t Props2, + bool &Result); + + bool evaluateCOPY(const Register &R1, const CellMap &Inputs, + LatticeCell &Result); + + // Logical operations. + bool evaluateANDrr(const Register &R1, const Register &R2, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateANDri(const Register &R1, const APInt &A2, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateANDii(const APInt &A1, const APInt &A2, APInt &Result); + bool evaluateORrr(const Register &R1, const Register &R2, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateORri(const Register &R1, const APInt &A2, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateORii(const APInt &A1, const APInt &A2, APInt &Result); + bool evaluateXORrr(const Register &R1, const Register &R2, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateXORri(const Register &R1, const APInt &A2, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateXORii(const APInt &A1, const APInt &A2, APInt &Result); + + // Extensions. + bool evaluateZEXTr(const Register &R1, unsigned Width, unsigned Bits, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateZEXTi(const APInt &A1, unsigned Width, unsigned Bits, + APInt &Result); + bool evaluateSEXTr(const Register &R1, unsigned Width, unsigned Bits, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateSEXTi(const APInt &A1, unsigned Width, unsigned Bits, + APInt &Result); + + // Leading/trailing bits. + bool evaluateCLBr(const Register &R1, bool Zeros, bool Ones, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateCLBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result); + bool evaluateCTBr(const Register &R1, bool Zeros, bool Ones, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateCTBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result); + + // Bitfield extract. + bool evaluateEXTRACTr(const Register &R1, unsigned Width, unsigned Bits, + unsigned Offset, bool Signed, const CellMap &Inputs, + LatticeCell &Result); + bool evaluateEXTRACTi(const APInt &A1, unsigned Bits, unsigned Offset, + bool Signed, APInt &Result); + // Vector operations. + bool evaluateSplatr(const Register &R1, unsigned Bits, unsigned Count, + const CellMap &Inputs, LatticeCell &Result); + bool evaluateSplati(const APInt &A1, unsigned Bits, unsigned Count, + APInt &Result); + }; + +} // end anonymous namespace + +uint32_t ConstantProperties::deduce(const Constant *C) { + if (isa<ConstantInt>(C)) { + const ConstantInt *CI = cast<ConstantInt>(C); + if (CI->isZero()) + return Zero | PosOrZero | NegOrZero | Finite; + uint32_t Props = (NonZero | Finite); + if (CI->isNegative()) + return Props | NegOrZero; + return Props | PosOrZero; + } + + if (isa<ConstantFP>(C)) { + const ConstantFP *CF = cast<ConstantFP>(C); + uint32_t Props = CF->isNegative() ? (NegOrZero|NonZero) + : PosOrZero; + if (CF->isZero()) + return (Props & ~NumericProperties) | (Zero|Finite); + Props = (Props & ~NumericProperties) | NonZero; + if (CF->isNaN()) + return (Props & ~NumericProperties) | NaN; + const APFloat &Val = CF->getValueAPF(); + if (Val.isInfinity()) + return (Props & ~NumericProperties) | Infinity; + Props |= Finite; + return Props; + } + + return Unknown; +} + +// Convert a cell from a set of specific values to a cell that tracks +// properties. +bool LatticeCell::convertToProperty() { + if (isProperty()) + return false; + // Corner case: converting a fresh (top) cell to "special". + // This can happen, when adding a property to a top cell. + uint32_t Everything = ConstantProperties::Everything; + uint32_t Ps = !isTop() ? properties() + : Everything; + if (Ps != ConstantProperties::Unknown) { + Properties = Ps; + setProperty(); + } else { + setBottom(); + } + return true; +} + +void LatticeCell::print(raw_ostream &os) const { + if (isProperty()) { + os << "{ "; + uint32_t Ps = properties(); + if (Ps & ConstantProperties::Zero) + os << "zero "; + if (Ps & ConstantProperties::NonZero) + os << "nonzero "; + if (Ps & ConstantProperties::Finite) + os << "finite "; + if (Ps & ConstantProperties::Infinity) + os << "infinity "; + if (Ps & ConstantProperties::NaN) + os << "nan "; + if (Ps & ConstantProperties::PosOrZero) + os << "poz "; + if (Ps & ConstantProperties::NegOrZero) + os << "nez "; + os << '}'; + return; + } + + os << "{ "; + if (isBottom()) { + os << "bottom"; + } else if (isTop()) { + os << "top"; + } else { + for (unsigned i = 0; i < size(); ++i) { + const Constant *C = Values[i]; + if (i != 0) + os << ", "; + C->print(os); + } + } + os << " }"; +} + +// "Meet" operation on two cells. This is the key of the propagation +// algorithm. +bool LatticeCell::meet(const LatticeCell &L) { + bool Changed = false; + if (L.isBottom()) + Changed = setBottom(); + if (isBottom() || L.isTop()) + return Changed; + if (isTop()) { + *this = L; + // L can be neither Top nor Bottom, so *this must have changed. + return true; + } + + // Top/bottom cases covered. Need to integrate L's set into ours. + if (L.isProperty()) + return add(L.properties()); + for (unsigned i = 0; i < L.size(); ++i) { + const Constant *LC = L.Values[i]; + Changed |= add(LC); + } + return Changed; +} + +// Add a new constant to the cell. This is actually where the cell update +// happens. If a cell has room for more constants, the new constant is added. +// Otherwise, the cell is converted to a "property" cell (i.e. a cell that +// will track properties of the associated values, and not the values +// themselves. Care is taken to handle special cases, like "bottom", etc. +bool LatticeCell::add(const Constant *LC) { + assert(LC); + if (isBottom()) + return false; + + if (!isProperty()) { + // Cell is not special. Try to add the constant here first, + // if there is room. + unsigned Index = 0; + while (Index < Size) { + const Constant *C = Values[Index]; + // If the constant is already here, no change is needed. + if (C == LC) + return false; + Index++; + } + if (Index < MaxCellSize) { + Values[Index] = LC; + Kind = Normal; + Size++; + return true; + } + } + + bool Changed = false; + + // This cell is special, or is not special, but is full. After this + // it will be special. + Changed = convertToProperty(); + uint32_t Ps = properties(); + uint32_t NewPs = Ps & ConstantProperties::deduce(LC); + if (NewPs == ConstantProperties::Unknown) { + setBottom(); + return true; + } + if (Ps != NewPs) { + Properties = NewPs; + Changed = true; + } + return Changed; +} + +// Add a property to the cell. This will force the cell to become a property- +// tracking cell. +bool LatticeCell::add(uint32_t Property) { + bool Changed = convertToProperty(); + uint32_t Ps = properties(); + if (Ps == (Ps & Property)) + return Changed; + Properties = Property & Ps; + return true; +} + +// Return the properties of the values in the cell. This is valid for any +// cell, and does not alter the cell itself. +uint32_t LatticeCell::properties() const { + if (isProperty()) + return Properties; + assert(!isTop() && "Should not call this for a top cell"); + if (isBottom()) + return ConstantProperties::Unknown; + + assert(size() > 0 && "Empty cell"); + uint32_t Ps = ConstantProperties::deduce(Values[0]); + for (unsigned i = 1; i < size(); ++i) { + if (Ps == ConstantProperties::Unknown) + break; + Ps &= ConstantProperties::deduce(Values[i]); + } + return Ps; +} + +void MachineConstPropagator::CellMap::print(raw_ostream &os, + const TargetRegisterInfo &TRI) const { + for (auto &I : Map) + dbgs() << " " << PrintReg(I.first, &TRI) << " -> " << I.second << '\n'; +} + +void MachineConstPropagator::visitPHI(const MachineInstr &PN) { + const MachineBasicBlock *MB = PN.getParent(); + unsigned MBN = MB->getNumber(); + DEBUG(dbgs() << "Visiting FI(BB#" << MBN << "): " << PN); + + const MachineOperand &MD = PN.getOperand(0); + Register DefR(MD); + assert(TargetRegisterInfo::isVirtualRegister(DefR.Reg)); + + bool Changed = false; + + // If the def has a sub-register, set the corresponding cell to "bottom". + if (DefR.SubReg) { +Bottomize: + const LatticeCell &T = Cells.get(DefR.Reg); + Changed = !T.isBottom(); + Cells.update(DefR.Reg, Bottom); + if (Changed) + visitUsesOf(DefR.Reg); + return; + } + + LatticeCell DefC = Cells.get(DefR.Reg); + + for (unsigned i = 1, n = PN.getNumOperands(); i < n; i += 2) { + const MachineBasicBlock *PB = PN.getOperand(i+1).getMBB(); + unsigned PBN = PB->getNumber(); + if (!EdgeExec.count(CFGEdge(PBN, MBN))) { + DEBUG(dbgs() << " edge BB#" << PBN << "->BB#" << MBN + << " not executable\n"); + continue; + } + const MachineOperand &SO = PN.getOperand(i); + Register UseR(SO); + // If the input is not a virtual register, we don't really know what + // value it holds. + if (!TargetRegisterInfo::isVirtualRegister(UseR.Reg)) + goto Bottomize; + // If there is no cell for an input register, it means top. + if (!Cells.has(UseR.Reg)) + continue; + + LatticeCell SrcC; + bool Eval = MCE.evaluate(UseR, Cells.get(UseR.Reg), SrcC); + DEBUG(dbgs() << " edge from BB#" << PBN << ": " + << PrintReg(UseR.Reg, &MCE.TRI, UseR.SubReg) + << SrcC << '\n'); + Changed |= Eval ? DefC.meet(SrcC) + : DefC.setBottom(); + Cells.update(DefR.Reg, DefC); + if (DefC.isBottom()) + break; + } + if (Changed) + visitUsesOf(DefR.Reg); +} + +void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) { + DEBUG(dbgs() << "Visiting MI(BB#" << MI.getParent()->getNumber() + << "): " << MI); + CellMap Outputs; + bool Eval = MCE.evaluate(MI, Cells, Outputs); + DEBUG({ + if (Eval) { + dbgs() << " outputs:"; + for (auto &I : Outputs) + dbgs() << ' ' << I.second; + dbgs() << '\n'; + } + }); + + // Update outputs. If the value was not computed, set all the + // def cells to bottom. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + Register DefR(MO); + // Only track virtual registers. + if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg)) + continue; + bool Changed = false; + // If the evaluation failed, set cells for all output registers to bottom. + if (!Eval) { + const LatticeCell &T = Cells.get(DefR.Reg); + Changed = !T.isBottom(); + Cells.update(DefR.Reg, Bottom); + } else { + // Find the corresponding cell in the computed outputs. + // If it's not there, go on to the next def. + if (!Outputs.has(DefR.Reg)) + continue; + LatticeCell RC = Cells.get(DefR.Reg); + Changed = RC.meet(Outputs.get(DefR.Reg)); + Cells.update(DefR.Reg, RC); + } + if (Changed) + visitUsesOf(DefR.Reg); + } +} + +// \brief Starting at a given branch, visit remaining branches in the block. +// Traverse over the subsequent branches for as long as the preceding one +// can fall through. Add all the possible targets to the flow work queue, +// including the potential fall-through to the layout-successor block. +void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) { + const MachineBasicBlock &B = *BrI.getParent(); + unsigned MBN = B.getNumber(); + MachineBasicBlock::const_iterator It = BrI.getIterator(); + MachineBasicBlock::const_iterator End = B.end(); + + SetVector<const MachineBasicBlock*> Targets; + bool EvalOk = true, FallsThru = true; + while (It != End) { + const MachineInstr &MI = *It; + InstrExec.insert(&MI); + DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "(BB#" + << MBN << "): " << MI); + // Do not evaluate subsequent branches if the evaluation of any of the + // previous branches failed. Keep iterating over the branches only + // to mark them as executable. + EvalOk = EvalOk && MCE.evaluate(MI, Cells, Targets, FallsThru); + if (!EvalOk) + FallsThru = true; + if (!FallsThru) + break; + ++It; + } + + if (EvalOk) { + // Need to add all CFG successors that lead to EH landing pads. + // There won't be explicit branches to these blocks, but they must + // be processed. + for (const MachineBasicBlock *SB : B.successors()) { + if (SB->isEHPad()) + Targets.insert(SB); + } + if (FallsThru) { + const MachineFunction &MF = *B.getParent(); + MachineFunction::const_iterator BI = B.getIterator(); + MachineFunction::const_iterator Next = std::next(BI); + if (Next != MF.end()) + Targets.insert(&*Next); + } + } else { + // If the evaluation of the branches failed, make "Targets" to be the + // set of all successors of the block from the CFG. + // If the evaluation succeeded for all visited branches, then if the + // last one set "FallsThru", then add an edge to the layout successor + // to the targets. + Targets.clear(); + DEBUG(dbgs() << " failed to evaluate a branch...adding all CFG " + "successors\n"); + for (const MachineBasicBlock *SB : B.successors()) + Targets.insert(SB); + } + + for (const MachineBasicBlock *TB : Targets) { + unsigned TBN = TB->getNumber(); + DEBUG(dbgs() << " pushing edge BB#" << MBN << " -> BB#" << TBN << "\n"); + FlowQ.push(CFGEdge(MBN, TBN)); + } +} + +void MachineConstPropagator::visitUsesOf(unsigned Reg) { + DEBUG(dbgs() << "Visiting uses of " << PrintReg(Reg, &MCE.TRI) + << Cells.get(Reg) << '\n'); + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + // Do not process non-executable instructions. They can become exceutable + // later (via a flow-edge in the work queue). In such case, the instruc- + // tion will be visited at that time. + if (!InstrExec.count(&MI)) + continue; + if (MI.isPHI()) + visitPHI(MI); + else if (!MI.isBranch()) + visitNonBranch(MI); + else + visitBranchesFrom(MI); + } +} + +bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB, + SetVector<const MachineBasicBlock*> &Targets) { + MachineBasicBlock::const_iterator FirstBr = MB->end(); + for (const MachineInstr &MI : *MB) { + if (MI.isDebugValue()) + continue; + if (MI.isBranch()) { + FirstBr = MI.getIterator(); + break; + } + } + + Targets.clear(); + MachineBasicBlock::const_iterator End = MB->end(); + + bool DoNext = true; + for (MachineBasicBlock::const_iterator I = FirstBr; I != End; ++I) { + const MachineInstr &MI = *I; + // Can there be debug instructions between branches? + if (MI.isDebugValue()) + continue; + if (!InstrExec.count(&MI)) + continue; + bool Eval = MCE.evaluate(MI, Cells, Targets, DoNext); + if (!Eval) + return false; + if (!DoNext) + break; + } + // If the last branch could fall-through, add block's layout successor. + if (DoNext) { + MachineFunction::const_iterator BI = MB->getIterator(); + MachineFunction::const_iterator NextI = std::next(BI); + if (NextI != MB->getParent()->end()) + Targets.insert(&*NextI); + } + + // Add all the EH landing pads. + for (const MachineBasicBlock *SB : MB->successors()) + if (SB->isEHPad()) + Targets.insert(SB); + + return true; +} + +void MachineConstPropagator::removeCFGEdge(MachineBasicBlock *From, + MachineBasicBlock *To) { + // First, remove the CFG successor/predecessor information. + From->removeSuccessor(To); + // Remove all corresponding PHI operands in the To block. + for (auto I = To->begin(), E = To->getFirstNonPHI(); I != E; ++I) { + MachineInstr *PN = &*I; + // reg0 = PHI reg1, bb2, reg3, bb4, ... + int N = PN->getNumOperands()-2; + while (N > 0) { + if (PN->getOperand(N+1).getMBB() == From) { + PN->RemoveOperand(N+1); + PN->RemoveOperand(N); + } + N -= 2; + } + } +} + +void MachineConstPropagator::propagate(MachineFunction &MF) { + MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&MF); + unsigned EntryNum = Entry->getNumber(); + + // Start with a fake edge, just to process the entry node. + FlowQ.push(CFGEdge(EntryNum, EntryNum)); + + while (!FlowQ.empty()) { + CFGEdge Edge = FlowQ.front(); + FlowQ.pop(); + + DEBUG(dbgs() << "Picked edge BB#" << Edge.first << "->BB#" + << Edge.second << '\n'); + if (Edge.first != EntryNum) + if (EdgeExec.count(Edge)) + continue; + EdgeExec.insert(Edge); + MachineBasicBlock *SB = MF.getBlockNumbered(Edge.second); + + // Process the block in three stages: + // - visit all PHI nodes, + // - visit all non-branch instructions, + // - visit block branches. + MachineBasicBlock::const_iterator It = SB->begin(), End = SB->end(); + + // Visit PHI nodes in the successor block. + while (It != End && It->isPHI()) { + InstrExec.insert(&*It); + visitPHI(*It); + ++It; + } + + // If the successor block just became executable, visit all instructions. + // To see if this is the first time we're visiting it, check the first + // non-debug instruction to see if it is executable. + while (It != End && It->isDebugValue()) + ++It; + assert(It == End || !It->isPHI()); + // If this block has been visited, go on to the next one. + if (It != End && InstrExec.count(&*It)) + continue; + // For now, scan all non-branch instructions. Branches require different + // processing. + while (It != End && !It->isBranch()) { + if (!It->isDebugValue()) { + InstrExec.insert(&*It); + visitNonBranch(*It); + } + ++It; + } + + // Time to process the end of the block. This is different from + // processing regular (non-branch) instructions, because there can + // be multiple branches in a block, and they can cause the block to + // terminate early. + if (It != End) { + visitBranchesFrom(*It); + } else { + // If the block didn't have a branch, add all successor edges to the + // work queue. (There should really be only one successor in such case.) + unsigned SBN = SB->getNumber(); + for (const MachineBasicBlock *SSB : SB->successors()) + FlowQ.push(CFGEdge(SBN, SSB->getNumber())); + } + } // while (FlowQ) + + DEBUG({ + dbgs() << "Cells after propagation:\n"; + Cells.print(dbgs(), MCE.TRI); + dbgs() << "Dead CFG edges:\n"; + for (const MachineBasicBlock &B : MF) { + unsigned BN = B.getNumber(); + for (const MachineBasicBlock *SB : B.successors()) { + unsigned SN = SB->getNumber(); + if (!EdgeExec.count(CFGEdge(BN, SN))) + dbgs() << " BB#" << BN << " -> BB#" << SN << '\n'; + } + } + }); +} + +bool MachineConstPropagator::rewrite(MachineFunction &MF) { + bool Changed = false; + // Rewrite all instructions based on the collected cell information. + // + // Traverse the instructions in a post-order, so that rewriting an + // instruction can make changes "downstream" in terms of control-flow + // without affecting the rewriting process. (We should not change + // instructions that have not yet been visited by the rewriter.) + // The reason for this is that the rewriter can introduce new vregs, + // and replace uses of old vregs (which had corresponding cells + // computed during propagation) with these new vregs (which at this + // point would not have any cells, and would appear to be "top"). + // If an attempt was made to evaluate an instruction with a fresh + // "top" vreg, it would cause an error (abend) in the evaluator. + + // Collect the post-order-traversal block ordering. The subsequent + // traversal/rewrite will update block successors, so it's safer + // if the visiting order it computed ahead of time. + std::vector<MachineBasicBlock*> POT; + for (MachineBasicBlock *B : post_order(&MF)) + if (!B->empty()) + POT.push_back(B); + + for (MachineBasicBlock *B : POT) { + // Walk the block backwards (which usually begin with the branches). + // If any branch is rewritten, we may need to update the successor + // information for this block. Unless the block's successors can be + // precisely determined (which may not be the case for indirect + // branches), we cannot modify any branch. + + // Compute the successor information. + SetVector<const MachineBasicBlock*> Targets; + bool HaveTargets = computeBlockSuccessors(B, Targets); + // Rewrite the executable instructions. Skip branches if we don't + // have block successor information. + for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) { + MachineInstr &MI = *I; + if (InstrExec.count(&MI)) { + if (MI.isBranch() && !HaveTargets) + continue; + Changed |= MCE.rewrite(MI, Cells); + } + } + // The rewriting could rewrite PHI nodes to non-PHI nodes, causing + // regular instructions to appear in between PHI nodes. Bring all + // the PHI nodes to the beginning of the block. + for (auto I = B->begin(), E = B->end(); I != E; ++I) { + if (I->isPHI()) + continue; + // I is not PHI. Find the next PHI node P. + auto P = I; + while (++P != E) + if (P->isPHI()) + break; + // Not found. + if (P == E) + break; + // Splice P right before I. + B->splice(I, B, P); + // Reset I to point at the just spliced PHI node. + --I; + } + // Update the block successor information: remove unnecessary successors. + if (HaveTargets) { + SmallVector<MachineBasicBlock*,2> ToRemove; + for (MachineBasicBlock *SB : B->successors()) { + if (!Targets.count(SB)) + ToRemove.push_back(const_cast<MachineBasicBlock*>(SB)); + Targets.remove(SB); + } + for (unsigned i = 0, n = ToRemove.size(); i < n; ++i) + removeCFGEdge(B, ToRemove[i]); + // If there are any blocks left in the computed targets, it means that + // we think that the block could go somewhere, but the CFG does not. + // This could legitimately happen in blocks that have non-returning + // calls---we would think that the execution can continue, but the + // CFG will not have a successor edge. + } + } + // Need to do some final post-processing. + // If a branch was not executable, it will not get rewritten, but should + // be removed (or replaced with something equivalent to a A2_nop). We can't + // erase instructions during rewriting, so this needs to be delayed until + // now. + for (MachineBasicBlock &B : MF) { + MachineBasicBlock::iterator I = B.begin(), E = B.end(); + while (I != E) { + auto Next = std::next(I); + if (I->isBranch() && !InstrExec.count(&*I)) + B.erase(I); + I = Next; + } + } + return Changed; +} + +// This is the constant propagation algorithm as described by Wegman-Zadeck. +// Most of the terminology comes from there. +bool MachineConstPropagator::run(MachineFunction &MF) { + DEBUG(MF.print(dbgs() << "Starting MachineConstPropagator\n", 0)); + + MRI = &MF.getRegInfo(); + + Cells.clear(); + EdgeExec.clear(); + InstrExec.clear(); + assert(FlowQ.empty()); + + propagate(MF); + bool Changed = rewrite(MF); + + DEBUG({ + dbgs() << "End of MachineConstPropagator (Changed=" << Changed << ")\n"; + if (Changed) + MF.print(dbgs(), 0); + }); + return Changed; +} + +// -------------------------------------------------------------------- +// Machine const evaluator. + +bool MachineConstEvaluator::getCell(const Register &R, const CellMap &Inputs, + LatticeCell &RC) { + if (!TargetRegisterInfo::isVirtualRegister(R.Reg)) + return false; + const LatticeCell &L = Inputs.get(R.Reg); + if (!R.SubReg) { + RC = L; + return !RC.isBottom(); + } + bool Eval = evaluate(R, L, RC); + return Eval && !RC.isBottom(); +} + +bool MachineConstEvaluator::constToInt(const Constant *C, + APInt &Val) const { + const ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) + return false; + Val = CI->getValue(); + return true; +} + +const ConstantInt *MachineConstEvaluator::intToConst(const APInt &Val) const { + return ConstantInt::get(CX, Val); +} + +bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const Register &R1, + const Register &R2, const CellMap &Inputs, bool &Result) { + assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg)); + LatticeCell LS1, LS2; + if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2)) + return false; + + bool IsProp1 = LS1.isProperty(); + bool IsProp2 = LS2.isProperty(); + if (IsProp1) { + uint32_t Prop1 = LS1.properties(); + if (IsProp2) + return evaluateCMPpp(Cmp, Prop1, LS2.properties(), Result); + uint32_t NegCmp = Comparison::negate(Cmp); + return evaluateCMPrp(NegCmp, R2, Prop1, Inputs, Result); + } + if (IsProp2) { + uint32_t Prop2 = LS2.properties(); + return evaluateCMPrp(Cmp, R1, Prop2, Inputs, Result); + } + + APInt A; + bool IsTrue = true, IsFalse = true; + for (unsigned i = 0; i < LS2.size(); ++i) { + bool Res; + bool Computed = constToInt(LS2.Values[i], A) && + evaluateCMPri(Cmp, R1, A, Inputs, Res); + if (!Computed) + return false; + IsTrue &= Res; + IsFalse &= !Res; + } + assert(!IsTrue || !IsFalse); + // The actual logical value of the comparison is same as IsTrue. + Result = IsTrue; + // Return true if the result was proven to be true or proven to be false. + return IsTrue || IsFalse; +} + +bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const Register &R1, + const APInt &A2, const CellMap &Inputs, bool &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS; + if (!getCell(R1, Inputs, LS)) + return false; + if (LS.isProperty()) + return evaluateCMPpi(Cmp, LS.properties(), A2, Result); + + APInt A; + bool IsTrue = true, IsFalse = true; + for (unsigned i = 0; i < LS.size(); ++i) { + bool Res; + bool Computed = constToInt(LS.Values[i], A) && + evaluateCMPii(Cmp, A, A2, Res); + if (!Computed) + return false; + IsTrue &= Res; + IsFalse &= !Res; + } + assert(!IsTrue || !IsFalse); + // The actual logical value of the comparison is same as IsTrue. + Result = IsTrue; + // Return true if the result was proven to be true or proven to be false. + return IsTrue || IsFalse; +} + +bool MachineConstEvaluator::evaluateCMPrp(uint32_t Cmp, const Register &R1, + uint64_t Props2, const CellMap &Inputs, bool &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS; + if (!getCell(R1, Inputs, LS)) + return false; + if (LS.isProperty()) + return evaluateCMPpp(Cmp, LS.properties(), Props2, Result); + + APInt A; + uint32_t NegCmp = Comparison::negate(Cmp); + bool IsTrue = true, IsFalse = true; + for (unsigned i = 0; i < LS.size(); ++i) { + bool Res; + bool Computed = constToInt(LS.Values[i], A) && + evaluateCMPpi(NegCmp, Props2, A, Res); + if (!Computed) + return false; + IsTrue &= Res; + IsFalse &= !Res; + } + assert(!IsTrue || !IsFalse); + Result = IsTrue; + return IsTrue || IsFalse; +} + +bool MachineConstEvaluator::evaluateCMPii(uint32_t Cmp, const APInt &A1, + const APInt &A2, bool &Result) { + // NE is a special kind of comparison (not composed of smaller properties). + if (Cmp == Comparison::NE) { + Result = !APInt::isSameValue(A1, A2); + return true; + } + if (Cmp == Comparison::EQ) { + Result = APInt::isSameValue(A1, A2); + return true; + } + if (Cmp & Comparison::EQ) { + if (APInt::isSameValue(A1, A2)) + return (Result = true); + } + assert((Cmp & (Comparison::L | Comparison::G)) && "Malformed comparison"); + Result = false; + + unsigned W1 = A1.getBitWidth(); + unsigned W2 = A2.getBitWidth(); + unsigned MaxW = (W1 >= W2) ? W1 : W2; + if (Cmp & Comparison::U) { + const APInt Zx1 = A1.zextOrSelf(MaxW); + const APInt Zx2 = A2.zextOrSelf(MaxW); + if (Cmp & Comparison::L) + Result = Zx1.ult(Zx2); + else if (Cmp & Comparison::G) + Result = Zx2.ult(Zx1); + return true; + } + + // Signed comparison. + const APInt Sx1 = A1.sextOrSelf(MaxW); + const APInt Sx2 = A2.sextOrSelf(MaxW); + if (Cmp & Comparison::L) + Result = Sx1.slt(Sx2); + else if (Cmp & Comparison::G) + Result = Sx2.slt(Sx1); + return true; +} + +bool MachineConstEvaluator::evaluateCMPpi(uint32_t Cmp, uint32_t Props, + const APInt &A2, bool &Result) { + if (Props == ConstantProperties::Unknown) + return false; + + // Should never see NaN here, but check for it for completeness. + if (Props & ConstantProperties::NaN) + return false; + // Infinity could theoretically be compared to a number, but the + // presence of infinity here would be very suspicious. If we don't + // know for sure that the number is finite, bail out. + if (!(Props & ConstantProperties::Finite)) + return false; + + // Let X be a number that has properties Props. + + if (Cmp & Comparison::U) { + // In case of unsigned comparisons, we can only compare against 0. + if (A2 == 0) { + // Any x!=0 will be considered >0 in an unsigned comparison. + if (Props & ConstantProperties::Zero) + Result = (Cmp & Comparison::EQ); + else if (Props & ConstantProperties::NonZero) + Result = (Cmp & Comparison::G) || (Cmp == Comparison::NE); + else + return false; + return true; + } + // A2 is not zero. The only handled case is if X = 0. + if (Props & ConstantProperties::Zero) { + Result = (Cmp & Comparison::L) || (Cmp == Comparison::NE); + return true; + } + return false; + } + + // Signed comparisons are different. + if (Props & ConstantProperties::Zero) { + if (A2 == 0) + Result = (Cmp & Comparison::EQ); + else + Result = (Cmp == Comparison::NE) || + ((Cmp & Comparison::L) && !A2.isNegative()) || + ((Cmp & Comparison::G) && A2.isNegative()); + return true; + } + if (Props & ConstantProperties::PosOrZero) { + // X >= 0 and !(A2 < 0) => cannot compare + if (!A2.isNegative()) + return false; + // X >= 0 and A2 < 0 + Result = (Cmp & Comparison::G) || (Cmp == Comparison::NE); + return true; + } + if (Props & ConstantProperties::NegOrZero) { + // X <= 0 and Src1 < 0 => cannot compare + if (A2 == 0 || A2.isNegative()) + return false; + // X <= 0 and A2 > 0 + Result = (Cmp & Comparison::L) || (Cmp == Comparison::NE); + return true; + } + + return false; +} + +bool MachineConstEvaluator::evaluateCMPpp(uint32_t Cmp, uint32_t Props1, + uint32_t Props2, bool &Result) { + typedef ConstantProperties P; + if ((Props1 & P::NaN) && (Props2 & P::NaN)) + return false; + if (!(Props1 & P::Finite) || !(Props2 & P::Finite)) + return false; + + bool Zero1 = (Props1 & P::Zero), Zero2 = (Props2 & P::Zero); + bool NonZero1 = (Props1 & P::NonZero), NonZero2 = (Props2 & P::NonZero); + if (Zero1 && Zero2) { + Result = (Cmp & Comparison::EQ); + return true; + } + if (Cmp == Comparison::NE) { + if ((Zero1 && NonZero2) || (NonZero1 && Zero2)) + return (Result = true); + return false; + } + + if (Cmp & Comparison::U) { + // In unsigned comparisons, we can only compare against a known zero, + // or a known non-zero. + if (Zero1 && NonZero2) { + Result = (Cmp & Comparison::L); + return true; + } + if (NonZero1 && Zero2) { + Result = (Cmp & Comparison::G); + return true; + } + return false; + } + + // Signed comparison. The comparison is not NE. + bool Poz1 = (Props1 & P::PosOrZero), Poz2 = (Props2 & P::PosOrZero); + bool Nez1 = (Props1 & P::NegOrZero), Nez2 = (Props2 & P::NegOrZero); + if (Nez1 && Poz2) { + if (NonZero1 || NonZero2) { + Result = (Cmp & Comparison::L); + return true; + } + // Either (or both) could be zero. Can only say that X <= Y. + if ((Cmp & Comparison::EQ) && (Cmp & Comparison::L)) + return (Result = true); + } + if (Poz1 && Nez2) { + if (NonZero1 || NonZero2) { + Result = (Cmp & Comparison::G); + return true; + } + // Either (or both) could be zero. Can only say that X >= Y. + if ((Cmp & Comparison::EQ) && (Cmp & Comparison::G)) + return (Result = true); + } + + return false; +} + +bool MachineConstEvaluator::evaluateCOPY(const Register &R1, + const CellMap &Inputs, LatticeCell &Result) { + return getCell(R1, Inputs, Result); +} + +bool MachineConstEvaluator::evaluateANDrr(const Register &R1, + const Register &R2, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg)); + const LatticeCell &L1 = Inputs.get(R2.Reg); + const LatticeCell &L2 = Inputs.get(R2.Reg); + // If both sources are bottom, exit. Otherwise try to evaluate ANDri + // with the non-bottom argument passed as the immediate. This is to + // catch cases of ANDing with 0. + if (L2.isBottom()) { + if (L1.isBottom()) + return false; + return evaluateANDrr(R2, R1, Inputs, Result); + } + LatticeCell LS2; + if (!evaluate(R2, L2, LS2)) + return false; + if (LS2.isBottom() || LS2.isProperty()) + return false; + + APInt A; + for (unsigned i = 0; i < LS2.size(); ++i) { + LatticeCell RC; + bool Eval = constToInt(LS2.Values[i], A) && + evaluateANDri(R1, A, Inputs, RC); + if (!Eval) + return false; + Result.meet(RC); + } + return !Result.isBottom(); +} + +bool MachineConstEvaluator::evaluateANDri(const Register &R1, + const APInt &A2, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + if (A2 == -1) + return getCell(R1, Inputs, Result); + if (A2 == 0) { + LatticeCell RC; + RC.add(intToConst(A2)); + // Overwrite Result. + Result = RC; + return true; + } + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isBottom() || LS1.isProperty()) + return false; + + APInt A, ResA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateANDii(A, A2, ResA); + if (!Eval) + return false; + const Constant *C = intToConst(ResA); + Result.add(C); + } + return !Result.isBottom(); +} + +bool MachineConstEvaluator::evaluateANDii(const APInt &A1, + const APInt &A2, APInt &Result) { + Result = A1 & A2; + return true; +} + +bool MachineConstEvaluator::evaluateORrr(const Register &R1, + const Register &R2, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg)); + const LatticeCell &L1 = Inputs.get(R2.Reg); + const LatticeCell &L2 = Inputs.get(R2.Reg); + // If both sources are bottom, exit. Otherwise try to evaluate ORri + // with the non-bottom argument passed as the immediate. This is to + // catch cases of ORing with -1. + if (L2.isBottom()) { + if (L1.isBottom()) + return false; + return evaluateORrr(R2, R1, Inputs, Result); + } + LatticeCell LS2; + if (!evaluate(R2, L2, LS2)) + return false; + if (LS2.isBottom() || LS2.isProperty()) + return false; + + APInt A; + for (unsigned i = 0; i < LS2.size(); ++i) { + LatticeCell RC; + bool Eval = constToInt(LS2.Values[i], A) && + evaluateORri(R1, A, Inputs, RC); + if (!Eval) + return false; + Result.meet(RC); + } + return !Result.isBottom(); +} + +bool MachineConstEvaluator::evaluateORri(const Register &R1, + const APInt &A2, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + if (A2 == 0) + return getCell(R1, Inputs, Result); + if (A2 == -1) { + LatticeCell RC; + RC.add(intToConst(A2)); + // Overwrite Result. + Result = RC; + return true; + } + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isBottom() || LS1.isProperty()) + return false; + + APInt A, ResA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateORii(A, A2, ResA); + if (!Eval) + return false; + const Constant *C = intToConst(ResA); + Result.add(C); + } + return !Result.isBottom(); +} + +bool MachineConstEvaluator::evaluateORii(const APInt &A1, + const APInt &A2, APInt &Result) { + Result = A1 | A2; + return true; +} + +bool MachineConstEvaluator::evaluateXORrr(const Register &R1, + const Register &R2, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg)); + LatticeCell LS1, LS2; + if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2)) + return false; + if (LS1.isProperty()) { + if (LS1.properties() & ConstantProperties::Zero) + return !(Result = LS2).isBottom(); + return false; + } + if (LS2.isProperty()) { + if (LS2.properties() & ConstantProperties::Zero) + return !(Result = LS1).isBottom(); + return false; + } + + APInt A; + for (unsigned i = 0; i < LS2.size(); ++i) { + LatticeCell RC; + bool Eval = constToInt(LS2.Values[i], A) && + evaluateXORri(R1, A, Inputs, RC); + if (!Eval) + return false; + Result.meet(RC); + } + return !Result.isBottom(); +} + +bool MachineConstEvaluator::evaluateXORri(const Register &R1, + const APInt &A2, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isProperty()) { + if (LS1.properties() & ConstantProperties::Zero) { + const Constant *C = intToConst(A2); + Result.add(C); + return !Result.isBottom(); + } + return false; + } + + APInt A, XA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateXORii(A, A2, XA); + if (!Eval) + return false; + const Constant *C = intToConst(XA); + Result.add(C); + } + return !Result.isBottom(); +} + +bool MachineConstEvaluator::evaluateXORii(const APInt &A1, + const APInt &A2, APInt &Result) { + Result = A1 ^ A2; + return true; +} + +bool MachineConstEvaluator::evaluateZEXTr(const Register &R1, unsigned Width, + unsigned Bits, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isProperty()) + return false; + + APInt A, XA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateZEXTi(A, Width, Bits, XA); + if (!Eval) + return false; + const Constant *C = intToConst(XA); + Result.add(C); + } + return true; +} + +bool MachineConstEvaluator::evaluateZEXTi(const APInt &A1, unsigned Width, + unsigned Bits, APInt &Result) { + unsigned BW = A1.getBitWidth(); + (void)BW; + assert(Width >= Bits && BW >= Bits); + APInt Mask = APInt::getLowBitsSet(Width, Bits); + Result = A1.zextOrTrunc(Width) & Mask; + return true; +} + +bool MachineConstEvaluator::evaluateSEXTr(const Register &R1, unsigned Width, + unsigned Bits, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isBottom() || LS1.isProperty()) + return false; + + APInt A, XA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateSEXTi(A, Width, Bits, XA); + if (!Eval) + return false; + const Constant *C = intToConst(XA); + Result.add(C); + } + return true; +} + +bool MachineConstEvaluator::evaluateSEXTi(const APInt &A1, unsigned Width, + unsigned Bits, APInt &Result) { + unsigned BW = A1.getBitWidth(); + assert(Width >= Bits && BW >= Bits); + // Special case to make things faster for smaller source widths. + // Sign extension of 0 bits generates 0 as a result. This is consistent + // with what the HW does. + if (Bits == 0) { + Result = APInt(Width, 0); + return true; + } + // In C, shifts by 64 invoke undefined behavior: handle that case in APInt. + if (BW <= 64 && Bits != 0) { + int64_t V = A1.getSExtValue(); + switch (Bits) { + case 8: + V = static_cast<int8_t>(V); + break; + case 16: + V = static_cast<int16_t>(V); + break; + case 32: + V = static_cast<int32_t>(V); + break; + default: + // Shift left to lose all bits except lower "Bits" bits, then shift + // the value back, replicating what was a sign bit after the first + // shift. + V = (V << (64-Bits)) >> (64-Bits); + break; + } + // V is a 64-bit sign-extended value. Convert it to APInt of desired + // width. + Result = APInt(Width, V, true); + return true; + } + // Slow case: the value doesn't fit in int64_t. + if (Bits < BW) + Result = A1.trunc(Bits).sext(Width); + else // Bits == BW + Result = A1.sext(Width); + return true; +} + +bool MachineConstEvaluator::evaluateCLBr(const Register &R1, bool Zeros, + bool Ones, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isBottom() || LS1.isProperty()) + return false; + + APInt A, CA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateCLBi(A, Zeros, Ones, CA); + if (!Eval) + return false; + const Constant *C = intToConst(CA); + Result.add(C); + } + return true; +} + +bool MachineConstEvaluator::evaluateCLBi(const APInt &A1, bool Zeros, + bool Ones, APInt &Result) { + unsigned BW = A1.getBitWidth(); + if (!Zeros && !Ones) + return false; + unsigned Count = 0; + if (Zeros && (Count == 0)) + Count = A1.countLeadingZeros(); + if (Ones && (Count == 0)) + Count = A1.countLeadingOnes(); + Result = APInt(BW, static_cast<uint64_t>(Count), false); + return true; +} + +bool MachineConstEvaluator::evaluateCTBr(const Register &R1, bool Zeros, + bool Ones, const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isBottom() || LS1.isProperty()) + return false; + + APInt A, CA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateCTBi(A, Zeros, Ones, CA); + if (!Eval) + return false; + const Constant *C = intToConst(CA); + Result.add(C); + } + return true; +} + +bool MachineConstEvaluator::evaluateCTBi(const APInt &A1, bool Zeros, + bool Ones, APInt &Result) { + unsigned BW = A1.getBitWidth(); + if (!Zeros && !Ones) + return false; + unsigned Count = 0; + if (Zeros && (Count == 0)) + Count = A1.countTrailingZeros(); + if (Ones && (Count == 0)) + Count = A1.countTrailingOnes(); + Result = APInt(BW, static_cast<uint64_t>(Count), false); + return true; +} + +bool MachineConstEvaluator::evaluateEXTRACTr(const Register &R1, + unsigned Width, unsigned Bits, unsigned Offset, bool Signed, + const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + assert(Bits+Offset <= Width); + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isBottom()) + return false; + if (LS1.isProperty()) { + uint32_t Ps = LS1.properties(); + if (Ps & ConstantProperties::Zero) { + const Constant *C = intToConst(APInt(Width, 0, false)); + Result.add(C); + return true; + } + return false; + } + + APInt A, CA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateEXTRACTi(A, Bits, Offset, Signed, CA); + if (!Eval) + return false; + const Constant *C = intToConst(CA); + Result.add(C); + } + return true; +} + +bool MachineConstEvaluator::evaluateEXTRACTi(const APInt &A1, unsigned Bits, + unsigned Offset, bool Signed, APInt &Result) { + unsigned BW = A1.getBitWidth(); + assert(Bits+Offset <= BW); + // Extracting 0 bits generates 0 as a result (as indicated by the HW people). + if (Bits == 0) { + Result = APInt(BW, 0); + return true; + } + if (BW <= 64) { + int64_t V = A1.getZExtValue(); + V <<= (64-Bits-Offset); + if (Signed) + V >>= (64-Bits); + else + V = static_cast<uint64_t>(V) >> (64-Bits); + Result = APInt(BW, V, Signed); + return true; + } + if (Signed) + Result = A1.shl(BW-Bits-Offset).ashr(BW-Bits); + else + Result = A1.shl(BW-Bits-Offset).lshr(BW-Bits); + return true; +} + +bool MachineConstEvaluator::evaluateSplatr(const Register &R1, + unsigned Bits, unsigned Count, const CellMap &Inputs, + LatticeCell &Result) { + assert(Inputs.has(R1.Reg)); + LatticeCell LS1; + if (!getCell(R1, Inputs, LS1)) + return false; + if (LS1.isBottom() || LS1.isProperty()) + return false; + + APInt A, SA; + for (unsigned i = 0; i < LS1.size(); ++i) { + bool Eval = constToInt(LS1.Values[i], A) && + evaluateSplati(A, Bits, Count, SA); + if (!Eval) + return false; + const Constant *C = intToConst(SA); + Result.add(C); + } + return true; +} + +bool MachineConstEvaluator::evaluateSplati(const APInt &A1, unsigned Bits, + unsigned Count, APInt &Result) { + assert(Count > 0); + unsigned BW = A1.getBitWidth(), SW = Count*Bits; + APInt LoBits = (Bits < BW) ? A1.trunc(Bits) : A1.zextOrSelf(Bits); + if (Count > 1) + LoBits = LoBits.zext(SW); + + APInt Res(SW, 0, false); + for (unsigned i = 0; i < Count; ++i) { + Res <<= Bits; + Res |= LoBits; + } + Result = Res; + return true; +} + +// ---------------------------------------------------------------------- +// Hexagon-specific code. + +namespace llvm { + + FunctionPass *createHexagonConstPropagationPass(); + void initializeHexagonConstPropagationPass(PassRegistry &Registry); + +} // end namespace llvm + +namespace { + + class HexagonConstEvaluator : public MachineConstEvaluator { + public: + HexagonConstEvaluator(MachineFunction &Fn); + + bool evaluate(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs) override; + bool evaluate(const Register &R, const LatticeCell &SrcC, + LatticeCell &Result) override; + bool evaluate(const MachineInstr &BrI, const CellMap &Inputs, + SetVector<const MachineBasicBlock*> &Targets, bool &FallsThru) + override; + bool rewrite(MachineInstr &MI, const CellMap &Inputs) override; + + private: + unsigned getRegBitWidth(unsigned Reg) const; + + static uint32_t getCmp(unsigned Opc); + static APInt getCmpImm(unsigned Opc, unsigned OpX, + const MachineOperand &MO); + void replaceWithNop(MachineInstr &MI); + + bool evaluateHexRSEQ32(Register RL, Register RH, const CellMap &Inputs, + LatticeCell &Result); + bool evaluateHexCompare(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs); + // This is suitable to be called for compare-and-jump instructions. + bool evaluateHexCompare2(uint32_t Cmp, const MachineOperand &Src1, + const MachineOperand &Src2, const CellMap &Inputs, bool &Result); + bool evaluateHexLogical(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs); + bool evaluateHexCondMove(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs); + bool evaluateHexExt(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs); + bool evaluateHexVector1(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs); + bool evaluateHexVector2(const MachineInstr &MI, const CellMap &Inputs, + CellMap &Outputs); + + void replaceAllRegUsesWith(unsigned FromReg, unsigned ToReg); + bool rewriteHexBranch(MachineInstr &BrI, const CellMap &Inputs); + bool rewriteHexConstDefs(MachineInstr &MI, const CellMap &Inputs, + bool &AllDefs); + bool rewriteHexConstUses(MachineInstr &MI, const CellMap &Inputs); + + MachineRegisterInfo *MRI; + const HexagonInstrInfo &HII; + const HexagonRegisterInfo &HRI; + }; + + class HexagonConstPropagation : public MachineFunctionPass { + public: + static char ID; + + HexagonConstPropagation() : MachineFunctionPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeHexagonConstPropagationPass(Registry); + } + + StringRef getPassName() const override { + return "Hexagon Constant Propagation"; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + const Function *F = MF.getFunction(); + if (!F) + return false; + if (skipFunction(*F)) + return false; + + HexagonConstEvaluator HCE(MF); + return MachineConstPropagator(HCE).run(MF); + } + }; + + char HexagonConstPropagation::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(HexagonConstPropagation, "hcp", "Hexagon Constant Propagation", + false, false) + +HexagonConstEvaluator::HexagonConstEvaluator(MachineFunction &Fn) + : MachineConstEvaluator(Fn), + HII(*Fn.getSubtarget<HexagonSubtarget>().getInstrInfo()), + HRI(*Fn.getSubtarget<HexagonSubtarget>().getRegisterInfo()) { + MRI = &Fn.getRegInfo(); +} + +bool HexagonConstEvaluator::evaluate(const MachineInstr &MI, + const CellMap &Inputs, CellMap &Outputs) { + if (MI.isCall()) + return false; + if (MI.getNumOperands() == 0 || !MI.getOperand(0).isReg()) + return false; + const MachineOperand &MD = MI.getOperand(0); + if (!MD.isDef()) + return false; + + unsigned Opc = MI.getOpcode(); + Register DefR(MD); + assert(!DefR.SubReg); + if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg)) + return false; + + if (MI.isCopy()) { + LatticeCell RC; + Register SrcR(MI.getOperand(1)); + bool Eval = evaluateCOPY(SrcR, Inputs, RC); + if (!Eval) + return false; + Outputs.update(DefR.Reg, RC); + return true; + } + if (MI.isRegSequence()) { + unsigned Sub1 = MI.getOperand(2).getImm(); + unsigned Sub2 = MI.getOperand(4).getImm(); + const TargetRegisterClass *DefRC = MRI->getRegClass(DefR.Reg); + unsigned SubLo = HRI.getHexagonSubRegIndex(DefRC, Hexagon::ps_sub_lo); + unsigned SubHi = HRI.getHexagonSubRegIndex(DefRC, Hexagon::ps_sub_hi); + if (Sub1 != SubLo && Sub1 != SubHi) + return false; + if (Sub2 != SubLo && Sub2 != SubHi) + return false; + assert(Sub1 != Sub2); + bool LoIs1 = (Sub1 == SubLo); + const MachineOperand &OpLo = LoIs1 ? MI.getOperand(1) : MI.getOperand(3); + const MachineOperand &OpHi = LoIs1 ? MI.getOperand(3) : MI.getOperand(1); + LatticeCell RC; + Register SrcRL(OpLo), SrcRH(OpHi); + bool Eval = evaluateHexRSEQ32(SrcRL, SrcRH, Inputs, RC); + if (!Eval) + return false; + Outputs.update(DefR.Reg, RC); + return true; + } + if (MI.isCompare()) { + bool Eval = evaluateHexCompare(MI, Inputs, Outputs); + return Eval; + } + + switch (Opc) { + default: + return false; + case Hexagon::A2_tfrsi: + case Hexagon::A2_tfrpi: + case Hexagon::CONST32: + case Hexagon::CONST64: + { + const MachineOperand &VO = MI.getOperand(1); + // The operand of CONST32 can be a blockaddress, e.g. + // %vreg0<def> = CONST32 <blockaddress(@eat, %L)> + // Do this check for all instructions for safety. + if (!VO.isImm()) + return false; + int64_t V = MI.getOperand(1).getImm(); + unsigned W = getRegBitWidth(DefR.Reg); + if (W != 32 && W != 64) + return false; + IntegerType *Ty = (W == 32) ? Type::getInt32Ty(CX) + : Type::getInt64Ty(CX); + const ConstantInt *CI = ConstantInt::get(Ty, V, true); + LatticeCell RC = Outputs.get(DefR.Reg); + RC.add(CI); + Outputs.update(DefR.Reg, RC); + break; + } + + case Hexagon::PS_true: + case Hexagon::PS_false: + { + LatticeCell RC = Outputs.get(DefR.Reg); + bool NonZero = (Opc == Hexagon::PS_true); + uint32_t P = NonZero ? ConstantProperties::NonZero + : ConstantProperties::Zero; + RC.add(P); + Outputs.update(DefR.Reg, RC); + break; + } + + case Hexagon::A2_and: + case Hexagon::A2_andir: + case Hexagon::A2_andp: + case Hexagon::A2_or: + case Hexagon::A2_orir: + case Hexagon::A2_orp: + case Hexagon::A2_xor: + case Hexagon::A2_xorp: + { + bool Eval = evaluateHexLogical(MI, Inputs, Outputs); + if (!Eval) + return false; + break; + } + + case Hexagon::A2_combineii: // combine(#s8Ext, #s8) + case Hexagon::A4_combineii: // combine(#s8, #u6Ext) + { + uint64_t Hi = MI.getOperand(1).getImm(); + uint64_t Lo = MI.getOperand(2).getImm(); + uint64_t Res = (Hi << 32) | (Lo & 0xFFFFFFFF); + IntegerType *Ty = Type::getInt64Ty(CX); + const ConstantInt *CI = ConstantInt::get(Ty, Res, false); + LatticeCell RC = Outputs.get(DefR.Reg); + RC.add(CI); + Outputs.update(DefR.Reg, RC); + break; + } + + case Hexagon::S2_setbit_i: + { + int64_t B = MI.getOperand(2).getImm(); + assert(B >=0 && B < 32); + APInt A(32, (1ull << B), false); + Register R(MI.getOperand(1)); + LatticeCell RC = Outputs.get(DefR.Reg); + bool Eval = evaluateORri(R, A, Inputs, RC); + if (!Eval) + return false; + Outputs.update(DefR.Reg, RC); + break; + } + + case Hexagon::C2_mux: + case Hexagon::C2_muxir: + case Hexagon::C2_muxri: + case Hexagon::C2_muxii: + { + bool Eval = evaluateHexCondMove(MI, Inputs, Outputs); + if (!Eval) + return false; + break; + } + + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_sxtw: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + { + bool Eval = evaluateHexExt(MI, Inputs, Outputs); + if (!Eval) + return false; + break; + } + + case Hexagon::S2_ct0: + case Hexagon::S2_ct0p: + case Hexagon::S2_ct1: + case Hexagon::S2_ct1p: + { + using namespace Hexagon; + + bool Ones = (Opc == S2_ct1) || (Opc == S2_ct1p); + Register R1(MI.getOperand(1)); + assert(Inputs.has(R1.Reg)); + LatticeCell T; + bool Eval = evaluateCTBr(R1, !Ones, Ones, Inputs, T); + if (!Eval) + return false; + // All of these instructions return a 32-bit value. The evaluate + // will generate the same type as the operand, so truncate the + // result if necessary. + APInt C; + LatticeCell RC = Outputs.get(DefR.Reg); + for (unsigned i = 0; i < T.size(); ++i) { + const Constant *CI = T.Values[i]; + if (constToInt(CI, C) && C.getBitWidth() > 32) + CI = intToConst(C.trunc(32)); + RC.add(CI); + } + Outputs.update(DefR.Reg, RC); + break; + } + + case Hexagon::S2_cl0: + case Hexagon::S2_cl0p: + case Hexagon::S2_cl1: + case Hexagon::S2_cl1p: + case Hexagon::S2_clb: + case Hexagon::S2_clbp: + { + using namespace Hexagon; + + bool OnlyZeros = (Opc == S2_cl0) || (Opc == S2_cl0p); + bool OnlyOnes = (Opc == S2_cl1) || (Opc == S2_cl1p); + Register R1(MI.getOperand(1)); + assert(Inputs.has(R1.Reg)); + LatticeCell T; + bool Eval = evaluateCLBr(R1, !OnlyOnes, !OnlyZeros, Inputs, T); + if (!Eval) + return false; + // All of these instructions return a 32-bit value. The evaluate + // will generate the same type as the operand, so truncate the + // result if necessary. + APInt C; + LatticeCell RC = Outputs.get(DefR.Reg); + for (unsigned i = 0; i < T.size(); ++i) { + const Constant *CI = T.Values[i]; + if (constToInt(CI, C) && C.getBitWidth() > 32) + CI = intToConst(C.trunc(32)); + RC.add(CI); + } + Outputs.update(DefR.Reg, RC); + break; + } + + case Hexagon::S4_extract: + case Hexagon::S4_extractp: + case Hexagon::S2_extractu: + case Hexagon::S2_extractup: + { + bool Signed = (Opc == Hexagon::S4_extract) || + (Opc == Hexagon::S4_extractp); + Register R1(MI.getOperand(1)); + unsigned BW = getRegBitWidth(R1.Reg); + unsigned Bits = MI.getOperand(2).getImm(); + unsigned Offset = MI.getOperand(3).getImm(); + LatticeCell RC = Outputs.get(DefR.Reg); + if (Offset >= BW) { + APInt Zero(BW, 0, false); + RC.add(intToConst(Zero)); + break; + } + if (Offset+Bits > BW) { + // If the requested bitfield extends beyond the most significant bit, + // the extra bits are treated as 0s. To emulate this behavior, reduce + // the number of requested bits, and make the extract unsigned. + Bits = BW-Offset; + Signed = false; + } + bool Eval = evaluateEXTRACTr(R1, BW, Bits, Offset, Signed, Inputs, RC); + if (!Eval) + return false; + Outputs.update(DefR.Reg, RC); + break; + } + + case Hexagon::S2_vsplatrb: + case Hexagon::S2_vsplatrh: + // vabsh, vabsh:sat + // vabsw, vabsw:sat + // vconj:sat + // vrndwh, vrndwh:sat + // vsathb, vsathub, vsatwuh + // vsxtbh, vsxthw + // vtrunehb, vtrunohb + // vzxtbh, vzxthw + { + bool Eval = evaluateHexVector1(MI, Inputs, Outputs); + if (!Eval) + return false; + break; + } + + // TODO: + // A2_vaddh + // A2_vaddhs + // A2_vaddw + // A2_vaddws + } + + return true; +} + +bool HexagonConstEvaluator::evaluate(const Register &R, + const LatticeCell &Input, LatticeCell &Result) { + if (!R.SubReg) { + Result = Input; + return true; + } + const TargetRegisterClass *RC = MRI->getRegClass(R.Reg); + if (RC != &Hexagon::DoubleRegsRegClass) + return false; + if (R.SubReg != Hexagon::isub_lo && R.SubReg != Hexagon::isub_hi) + return false; + + assert(!Input.isTop()); + if (Input.isBottom()) + return false; + + typedef ConstantProperties P; + if (Input.isProperty()) { + uint32_t Ps = Input.properties(); + if (Ps & (P::Zero|P::NaN)) { + uint32_t Ns = (Ps & (P::Zero|P::NaN|P::SignProperties)); + Result.add(Ns); + return true; + } + if (R.SubReg == Hexagon::isub_hi) { + uint32_t Ns = (Ps & P::SignProperties); + Result.add(Ns); + return true; + } + return false; + } + + // The Input cell contains some known values. Pick the word corresponding + // to the subregister. + APInt A; + for (unsigned i = 0; i < Input.size(); ++i) { + const Constant *C = Input.Values[i]; + if (!constToInt(C, A)) + return false; + if (!A.isIntN(64)) + return false; + uint64_t U = A.getZExtValue(); + if (R.SubReg == Hexagon::isub_hi) + U >>= 32; + U &= 0xFFFFFFFFULL; + uint32_t U32 = Lo_32(U); + int32_t V32; + memcpy(&V32, &U32, sizeof V32); + IntegerType *Ty = Type::getInt32Ty(CX); + const ConstantInt *C32 = ConstantInt::get(Ty, static_cast<int64_t>(V32)); + Result.add(C32); + } + return true; +} + +bool HexagonConstEvaluator::evaluate(const MachineInstr &BrI, + const CellMap &Inputs, SetVector<const MachineBasicBlock*> &Targets, + bool &FallsThru) { + // We need to evaluate one branch at a time. TII::analyzeBranch checks + // all the branches in a basic block at once, so we cannot use it. + unsigned Opc = BrI.getOpcode(); + bool SimpleBranch = false; + bool Negated = false; + switch (Opc) { + case Hexagon::J2_jumpf: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumpfnewpt: + Negated = true; + case Hexagon::J2_jumpt: + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumptnewpt: + // Simple branch: if([!]Pn) jump ... + // i.e. Op0 = predicate, Op1 = branch target. + SimpleBranch = true; + break; + case Hexagon::J2_jump: + Targets.insert(BrI.getOperand(0).getMBB()); + FallsThru = false; + return true; + default: +Undetermined: + // If the branch is of unknown type, assume that all successors are + // executable. + FallsThru = !BrI.isUnconditionalBranch(); + return false; + } + + if (SimpleBranch) { + const MachineOperand &MD = BrI.getOperand(0); + Register PR(MD); + // If the condition operand has a subregister, this is not something + // we currently recognize. + if (PR.SubReg) + goto Undetermined; + assert(Inputs.has(PR.Reg)); + const LatticeCell &PredC = Inputs.get(PR.Reg); + if (PredC.isBottom()) + goto Undetermined; + + uint32_t Props = PredC.properties(); + bool CTrue = false, CFalse = false;; + if (Props & ConstantProperties::Zero) + CFalse = true; + else if (Props & ConstantProperties::NonZero) + CTrue = true; + // If the condition is not known to be either, bail out. + if (!CTrue && !CFalse) + goto Undetermined; + + const MachineBasicBlock *BranchTarget = BrI.getOperand(1).getMBB(); + + FallsThru = false; + if ((!Negated && CTrue) || (Negated && CFalse)) + Targets.insert(BranchTarget); + else if ((!Negated && CFalse) || (Negated && CTrue)) + FallsThru = true; + else + goto Undetermined; + } + + return true; +} + +bool HexagonConstEvaluator::rewrite(MachineInstr &MI, const CellMap &Inputs) { + if (MI.isBranch()) + return rewriteHexBranch(MI, Inputs); + + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + break; + case Hexagon::A2_tfrsi: + case Hexagon::A2_tfrpi: + case Hexagon::CONST32: + case Hexagon::CONST64: + case Hexagon::PS_true: + case Hexagon::PS_false: + return false; + } + + unsigned NumOp = MI.getNumOperands(); + if (NumOp == 0) + return false; + + bool AllDefs, Changed; + Changed = rewriteHexConstDefs(MI, Inputs, AllDefs); + // If not all defs have been rewritten (i.e. the instruction defines + // a register that is not compile-time constant), then try to rewrite + // register operands that are known to be constant with immediates. + if (!AllDefs) + Changed |= rewriteHexConstUses(MI, Inputs); + + return Changed; +} + +unsigned HexagonConstEvaluator::getRegBitWidth(unsigned Reg) const { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) + return 32; + if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) + return 64; + if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) + return 8; + llvm_unreachable("Invalid register"); + return 0; +} + +uint32_t HexagonConstEvaluator::getCmp(unsigned Opc) { + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpheqi: + case Hexagon::C2_cmpeqi: + case Hexagon::J4_cmpeqn1_t_jumpnv_nt: + case Hexagon::J4_cmpeqn1_t_jumpnv_t: + case Hexagon::J4_cmpeqi_t_jumpnv_nt: + case Hexagon::J4_cmpeqi_t_jumpnv_t: + case Hexagon::J4_cmpeq_t_jumpnv_nt: + case Hexagon::J4_cmpeq_t_jumpnv_t: + return Comparison::EQ; + + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmpneqi: + case Hexagon::J4_cmpeqn1_f_jumpnv_nt: + case Hexagon::J4_cmpeqn1_f_jumpnv_t: + case Hexagon::J4_cmpeqi_f_jumpnv_nt: + case Hexagon::J4_cmpeqi_f_jumpnv_t: + case Hexagon::J4_cmpeq_f_jumpnv_nt: + case Hexagon::J4_cmpeq_f_jumpnv_t: + return Comparison::NE; + + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmphgti: + case Hexagon::C2_cmpgti: + case Hexagon::J4_cmpgtn1_t_jumpnv_nt: + case Hexagon::J4_cmpgtn1_t_jumpnv_t: + case Hexagon::J4_cmpgti_t_jumpnv_nt: + case Hexagon::J4_cmpgti_t_jumpnv_t: + case Hexagon::J4_cmpgt_t_jumpnv_nt: + case Hexagon::J4_cmpgt_t_jumpnv_t: + return Comparison::GTs; + + case Hexagon::C4_cmplte: + case Hexagon::C4_cmpltei: + case Hexagon::J4_cmpgtn1_f_jumpnv_nt: + case Hexagon::J4_cmpgtn1_f_jumpnv_t: + case Hexagon::J4_cmpgti_f_jumpnv_nt: + case Hexagon::J4_cmpgti_f_jumpnv_t: + case Hexagon::J4_cmpgt_f_jumpnv_nt: + case Hexagon::J4_cmpgt_f_jumpnv_t: + return Comparison::LEs; + + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmphgtui: + case Hexagon::C2_cmpgtui: + case Hexagon::J4_cmpgtui_t_jumpnv_nt: + case Hexagon::J4_cmpgtui_t_jumpnv_t: + case Hexagon::J4_cmpgtu_t_jumpnv_nt: + case Hexagon::J4_cmpgtu_t_jumpnv_t: + return Comparison::GTu; + + case Hexagon::J4_cmpltu_f_jumpnv_nt: + case Hexagon::J4_cmpltu_f_jumpnv_t: + return Comparison::GEu; + + case Hexagon::J4_cmpltu_t_jumpnv_nt: + case Hexagon::J4_cmpltu_t_jumpnv_t: + return Comparison::LTu; + + case Hexagon::J4_cmplt_f_jumpnv_nt: + case Hexagon::J4_cmplt_f_jumpnv_t: + return Comparison::GEs; + + case Hexagon::C4_cmplteu: + case Hexagon::C4_cmplteui: + case Hexagon::J4_cmpgtui_f_jumpnv_nt: + case Hexagon::J4_cmpgtui_f_jumpnv_t: + case Hexagon::J4_cmpgtu_f_jumpnv_nt: + case Hexagon::J4_cmpgtu_f_jumpnv_t: + return Comparison::LEu; + + case Hexagon::J4_cmplt_t_jumpnv_nt: + case Hexagon::J4_cmplt_t_jumpnv_t: + return Comparison::LTs; + + default: + break; + } + return Comparison::Unk; +} + +APInt HexagonConstEvaluator::getCmpImm(unsigned Opc, unsigned OpX, + const MachineOperand &MO) { + bool Signed = false; + switch (Opc) { + case Hexagon::A4_cmpbgtui: // u7 + case Hexagon::A4_cmphgtui: // u7 + break; + case Hexagon::A4_cmpheqi: // s8 + case Hexagon::C4_cmpneqi: // s8 + Signed = true; + case Hexagon::A4_cmpbeqi: // u8 + break; + case Hexagon::C2_cmpgtui: // u9 + case Hexagon::C4_cmplteui: // u9 + break; + case Hexagon::C2_cmpeqi: // s10 + case Hexagon::C2_cmpgti: // s10 + case Hexagon::C4_cmpltei: // s10 + Signed = true; + break; + case Hexagon::J4_cmpeqi_f_jumpnv_nt: // u5 + case Hexagon::J4_cmpeqi_f_jumpnv_t: // u5 + case Hexagon::J4_cmpeqi_t_jumpnv_nt: // u5 + case Hexagon::J4_cmpeqi_t_jumpnv_t: // u5 + case Hexagon::J4_cmpgti_f_jumpnv_nt: // u5 + case Hexagon::J4_cmpgti_f_jumpnv_t: // u5 + case Hexagon::J4_cmpgti_t_jumpnv_nt: // u5 + case Hexagon::J4_cmpgti_t_jumpnv_t: // u5 + case Hexagon::J4_cmpgtui_f_jumpnv_nt: // u5 + case Hexagon::J4_cmpgtui_f_jumpnv_t: // u5 + case Hexagon::J4_cmpgtui_t_jumpnv_nt: // u5 + case Hexagon::J4_cmpgtui_t_jumpnv_t: // u5 + break; + default: + llvm_unreachable("Unhandled instruction"); + break; + } + + uint64_t Val = MO.getImm(); + return APInt(32, Val, Signed); +} + +void HexagonConstEvaluator::replaceWithNop(MachineInstr &MI) { + MI.setDesc(HII.get(Hexagon::A2_nop)); + while (MI.getNumOperands() > 0) + MI.RemoveOperand(0); +} + +bool HexagonConstEvaluator::evaluateHexRSEQ32(Register RL, Register RH, + const CellMap &Inputs, LatticeCell &Result) { + assert(Inputs.has(RL.Reg) && Inputs.has(RH.Reg)); + LatticeCell LSL, LSH; + if (!getCell(RL, Inputs, LSL) || !getCell(RH, Inputs, LSH)) + return false; + if (LSL.isProperty() || LSH.isProperty()) + return false; + + unsigned LN = LSL.size(), HN = LSH.size(); + SmallVector<APInt,4> LoVs(LN), HiVs(HN); + for (unsigned i = 0; i < LN; ++i) { + bool Eval = constToInt(LSL.Values[i], LoVs[i]); + if (!Eval) + return false; + assert(LoVs[i].getBitWidth() == 32); + } + for (unsigned i = 0; i < HN; ++i) { + bool Eval = constToInt(LSH.Values[i], HiVs[i]); + if (!Eval) + return false; + assert(HiVs[i].getBitWidth() == 32); + } + + for (unsigned i = 0; i < HiVs.size(); ++i) { + APInt HV = HiVs[i].zextOrSelf(64) << 32; + for (unsigned j = 0; j < LoVs.size(); ++j) { + APInt LV = LoVs[j].zextOrSelf(64); + const Constant *C = intToConst(HV | LV); + Result.add(C); + if (Result.isBottom()) + return false; + } + } + return !Result.isBottom(); +} + +bool HexagonConstEvaluator::evaluateHexCompare(const MachineInstr &MI, + const CellMap &Inputs, CellMap &Outputs) { + unsigned Opc = MI.getOpcode(); + bool Classic = false; + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + // Classic compare: Dst0 = CMP Src1, Src2 + Classic = true; + break; + default: + // Not handling other compare instructions now. + return false; + } + + if (Classic) { + const MachineOperand &Src1 = MI.getOperand(1); + const MachineOperand &Src2 = MI.getOperand(2); + + bool Result; + unsigned Opc = MI.getOpcode(); + bool Computed = evaluateHexCompare2(Opc, Src1, Src2, Inputs, Result); + if (Computed) { + // Only create a zero/non-zero cell. At this time there isn't really + // much need for specific values. + Register DefR(MI.getOperand(0)); + LatticeCell L = Outputs.get(DefR.Reg); + uint32_t P = Result ? ConstantProperties::NonZero + : ConstantProperties::Zero; + L.add(P); + Outputs.update(DefR.Reg, L); + return true; + } + } + + return false; +} + +bool HexagonConstEvaluator::evaluateHexCompare2(unsigned Opc, + const MachineOperand &Src1, const MachineOperand &Src2, + const CellMap &Inputs, bool &Result) { + uint32_t Cmp = getCmp(Opc); + bool Reg1 = Src1.isReg(), Reg2 = Src2.isReg(); + bool Imm1 = Src1.isImm(), Imm2 = Src2.isImm(); + if (Reg1) { + Register R1(Src1); + if (Reg2) { + Register R2(Src2); + return evaluateCMPrr(Cmp, R1, R2, Inputs, Result); + } else if (Imm2) { + APInt A2 = getCmpImm(Opc, 2, Src2); + return evaluateCMPri(Cmp, R1, A2, Inputs, Result); + } + } else if (Imm1) { + APInt A1 = getCmpImm(Opc, 1, Src1); + if (Reg2) { + Register R2(Src2); + uint32_t NegCmp = Comparison::negate(Cmp); + return evaluateCMPri(NegCmp, R2, A1, Inputs, Result); + } else if (Imm2) { + APInt A2 = getCmpImm(Opc, 2, Src2); + return evaluateCMPii(Cmp, A1, A2, Result); + } + } + // Unknown kind of comparison. + return false; +} + +bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI, + const CellMap &Inputs, CellMap &Outputs) { + unsigned Opc = MI.getOpcode(); + if (MI.getNumOperands() != 3) + return false; + const MachineOperand &Src1 = MI.getOperand(1); + const MachineOperand &Src2 = MI.getOperand(2); + Register R1(Src1); + bool Eval = false; + LatticeCell RC; + switch (Opc) { + default: + return false; + case Hexagon::A2_and: + case Hexagon::A2_andp: + Eval = evaluateANDrr(R1, Register(Src2), Inputs, RC); + break; + case Hexagon::A2_andir: { + APInt A(32, Src2.getImm(), true); + Eval = evaluateANDri(R1, A, Inputs, RC); + break; + } + case Hexagon::A2_or: + case Hexagon::A2_orp: + Eval = evaluateORrr(R1, Register(Src2), Inputs, RC); + break; + case Hexagon::A2_orir: { + APInt A(32, Src2.getImm(), true); + Eval = evaluateORri(R1, A, Inputs, RC); + break; + } + case Hexagon::A2_xor: + case Hexagon::A2_xorp: + Eval = evaluateXORrr(R1, Register(Src2), Inputs, RC); + break; + } + if (Eval) { + Register DefR(MI.getOperand(0)); + Outputs.update(DefR.Reg, RC); + } + return Eval; +} + +bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI, + const CellMap &Inputs, CellMap &Outputs) { + // Dst0 = Cond1 ? Src2 : Src3 + Register CR(MI.getOperand(1)); + assert(Inputs.has(CR.Reg)); + LatticeCell LS; + if (!getCell(CR, Inputs, LS)) + return false; + uint32_t Ps = LS.properties(); + unsigned TakeOp; + if (Ps & ConstantProperties::Zero) + TakeOp = 3; + else if (Ps & ConstantProperties::NonZero) + TakeOp = 2; + else + return false; + + const MachineOperand &ValOp = MI.getOperand(TakeOp); + Register DefR(MI.getOperand(0)); + LatticeCell RC = Outputs.get(DefR.Reg); + + if (ValOp.isImm()) { + int64_t V = ValOp.getImm(); + unsigned W = getRegBitWidth(DefR.Reg); + APInt A(W, V, true); + const Constant *C = intToConst(A); + RC.add(C); + Outputs.update(DefR.Reg, RC); + return true; + } + if (ValOp.isReg()) { + Register R(ValOp); + const LatticeCell &LR = Inputs.get(R.Reg); + LatticeCell LSR; + if (!evaluate(R, LR, LSR)) + return false; + RC.meet(LSR); + Outputs.update(DefR.Reg, RC); + return true; + } + return false; +} + +bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI, + const CellMap &Inputs, CellMap &Outputs) { + // Dst0 = ext R1 + Register R1(MI.getOperand(1)); + assert(Inputs.has(R1.Reg)); + + unsigned Opc = MI.getOpcode(); + unsigned Bits; + switch (Opc) { + case Hexagon::A2_sxtb: + case Hexagon::A2_zxtb: + Bits = 8; + break; + case Hexagon::A2_sxth: + case Hexagon::A2_zxth: + Bits = 16; + break; + case Hexagon::A2_sxtw: + Bits = 32; + break; + } + + bool Signed = false; + switch (Opc) { + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_sxtw: + Signed = true; + break; + } + + Register DefR(MI.getOperand(0)); + unsigned BW = getRegBitWidth(DefR.Reg); + LatticeCell RC = Outputs.get(DefR.Reg); + bool Eval = Signed ? evaluateSEXTr(R1, BW, Bits, Inputs, RC) + : evaluateZEXTr(R1, BW, Bits, Inputs, RC); + if (!Eval) + return false; + Outputs.update(DefR.Reg, RC); + return true; +} + +bool HexagonConstEvaluator::evaluateHexVector1(const MachineInstr &MI, + const CellMap &Inputs, CellMap &Outputs) { + // DefR = op R1 + Register DefR(MI.getOperand(0)); + Register R1(MI.getOperand(1)); + assert(Inputs.has(R1.Reg)); + LatticeCell RC = Outputs.get(DefR.Reg); + bool Eval; + + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case Hexagon::S2_vsplatrb: + // Rd = 4 times Rs:0..7 + Eval = evaluateSplatr(R1, 8, 4, Inputs, RC); + break; + case Hexagon::S2_vsplatrh: + // Rdd = 4 times Rs:0..15 + Eval = evaluateSplatr(R1, 16, 4, Inputs, RC); + break; + default: + return false; + } + + if (!Eval) + return false; + Outputs.update(DefR.Reg, RC); + return true; +} + +bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, + const CellMap &Inputs, bool &AllDefs) { + AllDefs = false; + + // Some diagnostics. + // DEBUG({...}) gets confused with all this code as an argument. +#ifndef NDEBUG + bool Debugging = DebugFlag && isCurrentDebugType(DEBUG_TYPE); + if (Debugging) { + bool Const = true, HasUse = false; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.isImplicit()) + continue; + Register R(MO); + if (!TargetRegisterInfo::isVirtualRegister(R.Reg)) + continue; + HasUse = true; + // PHIs can legitimately have "top" cells after propagation. + if (!MI.isPHI() && !Inputs.has(R.Reg)) { + dbgs() << "Top " << PrintReg(R.Reg, &HRI, R.SubReg) + << " in MI: " << MI; + continue; + } + const LatticeCell &L = Inputs.get(R.Reg); + Const &= L.isSingle(); + if (!Const) + break; + } + if (HasUse && Const) { + if (!MI.isCopy()) { + dbgs() << "CONST: " << MI; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.isImplicit()) + continue; + unsigned R = MO.getReg(); + dbgs() << PrintReg(R, &TRI) << ": " << Inputs.get(R) << "\n"; + } + } + } + } +#endif + + // Avoid generating TFRIs for register transfers---this will keep the + // coalescing opportunities. + if (MI.isCopy()) + return false; + + // Collect all virtual register-def operands. + SmallVector<unsigned,2> DefRegs; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + assert(!MO.getSubReg()); + assert(Inputs.has(R)); + DefRegs.push_back(R); + } + + MachineBasicBlock &B = *MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + unsigned ChangedNum = 0; +#ifndef NDEBUG + SmallVector<const MachineInstr*,4> NewInstrs; +#endif + + // For each defined register, if it is a constant, create an instruction + // NewR = const + // and replace all uses of the defined register with NewR. + for (unsigned i = 0, n = DefRegs.size(); i < n; ++i) { + unsigned R = DefRegs[i]; + const LatticeCell &L = Inputs.get(R); + if (L.isBottom()) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(R); + MachineBasicBlock::iterator At = MI.getIterator(); + + if (!L.isSingle()) { + // If this a zero/non-zero cell, we can fold a definition + // of a predicate register. + typedef ConstantProperties P; + uint64_t Ps = L.properties(); + if (!(Ps & (P::Zero|P::NonZero))) + continue; + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + if (RC != PredRC) + continue; + const MCInstrDesc *NewD = (Ps & P::Zero) ? + &HII.get(Hexagon::PS_false) : + &HII.get(Hexagon::PS_true); + unsigned NewR = MRI->createVirtualRegister(PredRC); + const MachineInstrBuilder &MIB = BuildMI(B, At, DL, *NewD, NewR); + (void)MIB; +#ifndef NDEBUG + NewInstrs.push_back(&*MIB); +#endif + replaceAllRegUsesWith(R, NewR); + } else { + // This cell has a single value. + APInt A; + if (!constToInt(L.Value, A) || !A.isSignedIntN(64)) + continue; + const TargetRegisterClass *NewRC; + const MCInstrDesc *NewD; + + unsigned W = getRegBitWidth(R); + int64_t V = A.getSExtValue(); + assert(W == 32 || W == 64); + if (W == 32) + NewRC = &Hexagon::IntRegsRegClass; + else + NewRC = &Hexagon::DoubleRegsRegClass; + unsigned NewR = MRI->createVirtualRegister(NewRC); + const MachineInstr *NewMI; + + if (W == 32) { + NewD = &HII.get(Hexagon::A2_tfrsi); + NewMI = BuildMI(B, At, DL, *NewD, NewR) + .addImm(V); + } else { + if (A.isSignedIntN(8)) { + NewD = &HII.get(Hexagon::A2_tfrpi); + NewMI = BuildMI(B, At, DL, *NewD, NewR) + .addImm(V); + } else { + int32_t Hi = V >> 32; + int32_t Lo = V & 0xFFFFFFFFLL; + if (isInt<8>(Hi) && isInt<8>(Lo)) { + NewD = &HII.get(Hexagon::A2_combineii); + NewMI = BuildMI(B, At, DL, *NewD, NewR) + .addImm(Hi) + .addImm(Lo); + } else { + NewD = &HII.get(Hexagon::CONST64); + NewMI = BuildMI(B, At, DL, *NewD, NewR) + .addImm(V); + } + } + } + (void)NewMI; +#ifndef NDEBUG + NewInstrs.push_back(NewMI); +#endif + replaceAllRegUsesWith(R, NewR); + } + ChangedNum++; + } + + DEBUG({ + if (!NewInstrs.empty()) { + MachineFunction &MF = *MI.getParent()->getParent(); + dbgs() << "In function: " << MF.getFunction()->getName() << "\n"; + dbgs() << "Rewrite: for " << MI << " created " << *NewInstrs[0]; + for (unsigned i = 1; i < NewInstrs.size(); ++i) + dbgs() << " " << *NewInstrs[i]; + } + }); + + AllDefs = (ChangedNum == DefRegs.size()); + return ChangedNum > 0; +} + +bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI, + const CellMap &Inputs) { + bool Changed = false; + unsigned Opc = MI.getOpcode(); + MachineBasicBlock &B = *MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock::iterator At = MI.getIterator(); + MachineInstr *NewMI = nullptr; + + switch (Opc) { + case Hexagon::M2_maci: + // Convert DefR += mpyi(R2, R3) + // to DefR += mpyi(R, #imm), + // or DefR -= mpyi(R, #imm). + { + Register DefR(MI.getOperand(0)); + assert(!DefR.SubReg); + Register R2(MI.getOperand(2)); + Register R3(MI.getOperand(3)); + assert(Inputs.has(R2.Reg) && Inputs.has(R3.Reg)); + LatticeCell LS2, LS3; + // It is enough to get one of the input cells, since we will only try + // to replace one argument---whichever happens to be a single constant. + bool HasC2 = getCell(R2, Inputs, LS2), HasC3 = getCell(R3, Inputs, LS3); + if (!HasC2 && !HasC3) + return false; + bool Zero = ((HasC2 && (LS2.properties() & ConstantProperties::Zero)) || + (HasC3 && (LS3.properties() & ConstantProperties::Zero))); + // If one of the operands is zero, eliminate the multiplication. + if (Zero) { + // DefR == R1 (tied operands). + MachineOperand &Acc = MI.getOperand(1); + Register R1(Acc); + unsigned NewR = R1.Reg; + if (R1.SubReg) { + // Generate COPY. FIXME: Replace with the register:subregister. + const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg); + NewR = MRI->createVirtualRegister(RC); + NewMI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(R1.Reg, getRegState(Acc), R1.SubReg); + } + replaceAllRegUsesWith(DefR.Reg, NewR); + MRI->clearKillFlags(NewR); + Changed = true; + break; + } + + bool Swap = false; + if (!LS3.isSingle()) { + if (!LS2.isSingle()) + return false; + Swap = true; + } + const LatticeCell &LI = Swap ? LS2 : LS3; + const MachineOperand &OpR2 = Swap ? MI.getOperand(3) + : MI.getOperand(2); + // LI is single here. + APInt A; + if (!constToInt(LI.Value, A) || !A.isSignedIntN(8)) + return false; + int64_t V = A.getSExtValue(); + const MCInstrDesc &D = (V >= 0) ? HII.get(Hexagon::M2_macsip) + : HII.get(Hexagon::M2_macsin); + if (V < 0) + V = -V; + const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg); + unsigned NewR = MRI->createVirtualRegister(RC); + const MachineOperand &Src1 = MI.getOperand(1); + NewMI = BuildMI(B, At, DL, D, NewR) + .addReg(Src1.getReg(), getRegState(Src1), Src1.getSubReg()) + .addReg(OpR2.getReg(), getRegState(OpR2), OpR2.getSubReg()) + .addImm(V); + replaceAllRegUsesWith(DefR.Reg, NewR); + Changed = true; + break; + } + + case Hexagon::A2_and: + { + Register R1(MI.getOperand(1)); + Register R2(MI.getOperand(2)); + assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg)); + LatticeCell LS1, LS2; + unsigned CopyOf = 0; + // Check if any of the operands is -1 (i.e. all bits set). + if (getCell(R1, Inputs, LS1) && LS1.isSingle()) { + APInt M1; + if (constToInt(LS1.Value, M1) && !~M1) + CopyOf = 2; + } + else if (getCell(R2, Inputs, LS2) && LS2.isSingle()) { + APInt M1; + if (constToInt(LS2.Value, M1) && !~M1) + CopyOf = 1; + } + if (!CopyOf) + return false; + MachineOperand &SO = MI.getOperand(CopyOf); + Register SR(SO); + Register DefR(MI.getOperand(0)); + unsigned NewR = SR.Reg; + if (SR.SubReg) { + const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg); + NewR = MRI->createVirtualRegister(RC); + NewMI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(SR.Reg, getRegState(SO), SR.SubReg); + } + replaceAllRegUsesWith(DefR.Reg, NewR); + MRI->clearKillFlags(NewR); + Changed = true; + } + break; + + case Hexagon::A2_or: + { + Register R1(MI.getOperand(1)); + Register R2(MI.getOperand(2)); + assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg)); + LatticeCell LS1, LS2; + unsigned CopyOf = 0; + typedef ConstantProperties P; + if (getCell(R1, Inputs, LS1) && (LS1.properties() & P::Zero)) + CopyOf = 2; + else if (getCell(R2, Inputs, LS2) && (LS2.properties() & P::Zero)) + CopyOf = 1; + if (!CopyOf) + return false; + MachineOperand &SO = MI.getOperand(CopyOf); + Register SR(SO); + Register DefR(MI.getOperand(0)); + unsigned NewR = SR.Reg; + if (SR.SubReg) { + const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg); + NewR = MRI->createVirtualRegister(RC); + NewMI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(SR.Reg, getRegState(SO), SR.SubReg); + } + replaceAllRegUsesWith(DefR.Reg, NewR); + MRI->clearKillFlags(NewR); + Changed = true; + } + break; + } + + if (NewMI) { + // clear all the kill flags of this new instruction. + for (MachineOperand &MO : NewMI->operands()) + if (MO.isReg() && MO.isUse()) + MO.setIsKill(false); + } + + DEBUG({ + if (NewMI) { + dbgs() << "Rewrite: for " << MI; + if (NewMI != &MI) + dbgs() << " created " << *NewMI; + else + dbgs() << " modified the instruction itself and created:" << *NewMI; + } + }); + + return Changed; +} + +void HexagonConstEvaluator::replaceAllRegUsesWith(unsigned FromReg, + unsigned ToReg) { + assert(TargetRegisterInfo::isVirtualRegister(FromReg)); + assert(TargetRegisterInfo::isVirtualRegister(ToReg)); + for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) { + MachineOperand &O = *I; + ++I; + O.setReg(ToReg); + } +} + +bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI, + const CellMap &Inputs) { + MachineBasicBlock &B = *BrI.getParent(); + unsigned NumOp = BrI.getNumOperands(); + if (!NumOp) + return false; + + bool FallsThru; + SetVector<const MachineBasicBlock*> Targets; + bool Eval = evaluate(BrI, Inputs, Targets, FallsThru); + unsigned NumTargets = Targets.size(); + if (!Eval || NumTargets > 1 || (NumTargets == 1 && FallsThru)) + return false; + if (BrI.getOpcode() == Hexagon::J2_jump) + return false; + + DEBUG(dbgs() << "Rewrite(BB#" << B.getNumber() << "):" << BrI); + bool Rewritten = false; + if (NumTargets > 0) { + assert(!FallsThru && "This should have been checked before"); + // MIB.addMBB needs non-const pointer. + MachineBasicBlock *TargetB = const_cast<MachineBasicBlock*>(Targets[0]); + bool Moot = B.isLayoutSuccessor(TargetB); + if (!Moot) { + // If we build a branch here, we must make sure that it won't be + // erased as "non-executable". We can't mark any new instructions + // as executable here, so we need to overwrite the BrI, which we + // know is executable. + const MCInstrDesc &JD = HII.get(Hexagon::J2_jump); + auto NI = BuildMI(B, BrI.getIterator(), BrI.getDebugLoc(), JD) + .addMBB(TargetB); + BrI.setDesc(JD); + while (BrI.getNumOperands() > 0) + BrI.RemoveOperand(0); + // This ensures that all implicit operands (e.g. %R31<imp-def>, etc) + // are present in the rewritten branch. + for (auto &Op : NI->operands()) + BrI.addOperand(Op); + NI->eraseFromParent(); + Rewritten = true; + } + } + + // Do not erase instructions. A newly created instruction could get + // the same address as an instruction marked as executable during the + // propagation. + if (!Rewritten) + replaceWithNop(BrI); + return true; +} + +FunctionPass *llvm::createHexagonConstPropagationPass() { + return new HexagonConstPropagation(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index face0f3..3608099 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -11,13 +11,9 @@ // to move them together. If we can move them next to each other we do so and // replace them with a combine instruction. //===----------------------------------------------------------------------===// -#include "llvm/PassSupport.h" -#include "Hexagon.h" #include "HexagonInstrInfo.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" +#include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -64,6 +60,7 @@ namespace { class HexagonCopyToCombine : public MachineFunctionPass { const HexagonInstrInfo *TII; const TargetRegisterInfo *TRI; + const HexagonSubtarget *ST; bool ShouldCombineAggressively; DenseSet<MachineInstr *> PotentiallyNewifiableTFR; @@ -80,7 +77,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { + StringRef getPassName() const override { return "Hexagon Copy-To-Combine Pass"; } @@ -88,7 +85,7 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } private: @@ -163,6 +160,10 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII, (ShouldCombineAggressively || NotExt); } + case Hexagon::V6_vassign: + case Hexagon::V6_vassign_128B: + return true; + default: break; } @@ -186,11 +187,22 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, MachineInstr &LowRegInst, bool AllowC64) { unsigned HiOpc = HighRegInst.getOpcode(); unsigned LoOpc = LowRegInst.getOpcode(); - (void)HiOpc; // Fix compiler warning - (void)LoOpc; // Fix compiler warning - assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) && - (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) && - "Assume individual instructions are of a combinable type"); + + auto verifyOpc = [](unsigned Opc) -> void { + switch (Opc) { + case Hexagon::A2_tfr: + case Hexagon::A2_tfrsi: + case Hexagon::V6_vassign: + break; + default: + llvm_unreachable("Unexpected opcode"); + } + }; + verifyOpc(HiOpc); + verifyOpc(LoOpc); + + if (HiOpc == Hexagon::V6_vassign || LoOpc == Hexagon::V6_vassign) + return HiOpc == LoOpc; if (!AllowC64) { // There is no combine of two constant extended values. @@ -216,9 +228,13 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, } static bool isEvenReg(unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - Hexagon::IntRegsRegClass.contains(Reg)); - return (Reg - Hexagon::R0) % 2 == 0; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (Hexagon::IntRegsRegClass.contains(Reg)) + return (Reg - Hexagon::R0) % 2 == 0; + if (Hexagon::VectorRegsRegClass.contains(Reg) || + Hexagon::VectorRegs128BRegClass.contains(Reg)) + return (Reg - Hexagon::V0) % 2 == 0; + llvm_unreachable("Invalid register"); } static void removeKillInfo(MachineInstr &MI, unsigned RegNotKilled) { @@ -385,7 +401,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { continue; // Mark TFRs that feed a potential new value store as such. - if (TII->mayBeNewStore(&MI)) { + if (TII->mayBeNewStore(MI)) { // Look for uses of TFR instructions. for (unsigned OpdIdx = 0, OpdE = MI.getNumOperands(); OpdIdx != OpdE; ++OpdIdx) { @@ -446,8 +462,9 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { bool HasChanged = false; // Get target info. - TRI = MF.getSubtarget().getRegisterInfo(); - TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + ST = &MF.getSubtarget<HexagonSubtarget>(); + TRI = ST->getRegisterInfo(); + TII = ST->getInstrInfo(); const Function *F = MF.getFunction(); bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize); @@ -504,8 +521,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, bool &DoInsertAtI1, bool AllowC64) { MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1)); - - while (I2->isDebugValue()) + while (I2 != I1.getParent()->end() && I2->isDebugValue()) ++I2; unsigned I1DestReg = I1.getOperand(0).getReg(); @@ -564,14 +580,26 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2, unsigned I2DestReg = I2.getOperand(0).getReg(); bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; + unsigned SubLo; + + const TargetRegisterClass *SuperRC = nullptr; + if (Hexagon::IntRegsRegClass.contains(LoRegDef)) { + SuperRC = &Hexagon::DoubleRegsRegClass; + SubLo = Hexagon::isub_lo; + } else if (Hexagon::VectorRegsRegClass.contains(LoRegDef)) { + assert(ST->useHVXOps()); + if (ST->useHVXSglOps()) + SuperRC = &Hexagon::VecDblRegsRegClass; + else + SuperRC = &Hexagon::VecDblRegs128BRegClass; + SubLo = Hexagon::vsub_lo; + } else + llvm_unreachable("Unexpected register class"); // Get the double word register. - unsigned DoubleRegDest = - TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, - &Hexagon::DoubleRegsRegClass); + unsigned DoubleRegDest = TRI->getMatchingSuperReg(LoRegDef, SubLo, SuperRC); assert(DoubleRegDest != 0 && "Expect a valid register"); - // Setup source operands. MachineOperand &LoOperand = IsI1Loreg ? I1.getOperand(1) : I2.getOperand(1); MachineOperand &HiOperand = IsI1Loreg ? I2.getOperand(1) : I1.getOperand(1); @@ -605,7 +633,7 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2, for (auto NewMI : DbgMItoMove) { // If iterator MI is pointing to DEBUG_VAL, make sure // MI now points to next relevant instruction. - if (NewMI == (MachineInstr*)MI) + if (NewMI == MI) ++MI; BB->splice(InsertPt, BB, NewMI); } @@ -628,8 +656,7 @@ void HexagonCopyToCombine::emitConst64(MachineBasicBlock::iterator &InsertPt, int64_t V = HiOperand.getImm(); V = (V << 32) | (0x0ffffffffLL & LoOperand.getImm()); - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::CONST64_Int_Real), - DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::CONST64), DoubleDestReg) .addImm(V); } @@ -838,7 +865,19 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, // Insert new combine instruction. // DoubleRegDest = combine HiReg, LoReg - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg) + unsigned NewOpc; + if (Hexagon::DoubleRegsRegClass.contains(DoubleDestReg)) { + NewOpc = Hexagon::A2_combinew; + } else if (Hexagon::VecDblRegsRegClass.contains(DoubleDestReg)) { + assert(ST->useHVXOps()); + if (ST->useHVXSglOps()) + NewOpc = Hexagon::V6_vcombine; + else + NewOpc = Hexagon::V6_vcombine_128B; + } else + llvm_unreachable("Unexpected register"); + + BuildMI(*BB, InsertPt, DL, TII->get(NewOpc), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addReg(LoReg, LoRegKillFlag); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 2665acd..a5351cd 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -52,7 +52,7 @@ // %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 // spec-> %vreg11<def> = A2_addp %vreg6, %vreg10 // pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11 -// %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11 +// %vreg46<def> = PS_pselect %vreg41, %vreg6, %vreg11 // %vreg13<def> = A2_addp %vreg7, %vreg46 // %vreg42<def> = C2_cmpeqi %vreg9, 10 // J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> @@ -61,32 +61,46 @@ #define DEBUG_TYPE "hexagon-eif" +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "HexagonTargetMachine.h" - -#include <functional> +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <iterator> using namespace llvm; namespace llvm { + FunctionPass *createHexagonEarlyIfConversion(); void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry); -} + +} // end namespace llvm namespace { + cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, cl::init(false), cl::desc("Enable branch probability info")); cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden, @@ -103,18 +117,22 @@ namespace { } struct FlowPattern { - FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {} + FlowPattern() = default; FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB, MachineBasicBlock *FB, MachineBasicBlock *JB) : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {} - MachineBasicBlock *SplitB; - MachineBasicBlock *TrueB, *FalseB, *JoinB; - unsigned PredR; + MachineBasicBlock *SplitB = nullptr; + MachineBasicBlock *TrueB = nullptr; + MachineBasicBlock *FalseB = nullptr; + MachineBasicBlock *JoinB = nullptr; + unsigned PredR = 0; }; + struct PrintFP { PrintFP(const FlowPattern &P, const TargetRegisterInfo &T) : FP(P), TRI(T) {} + const FlowPattern &FP; const TargetRegisterInfo &TRI; friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); @@ -133,13 +151,17 @@ namespace { class HexagonEarlyIfConversion : public MachineFunctionPass { public: static char ID; + HexagonEarlyIfConversion() : MachineFunctionPass(ID), - TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) { + HII(nullptr), TRI(nullptr), MFN(nullptr), MRI(nullptr), MDT(nullptr), + MLI(nullptr) { initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const override { + + StringRef getPassName() const override { return "Hexagon early if conversion"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineDominatorTree>(); @@ -147,6 +169,7 @@ namespace { AU.addRequired<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -185,7 +208,7 @@ namespace { void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB); void simplifyFlowGraph(const FlowPattern &FP); - const TargetInstrInfo *TII; + const HexagonInstrInfo *HII; const TargetRegisterInfo *TRI; MachineFunction *MFN; MachineRegisterInfo *MRI; @@ -196,7 +219,8 @@ namespace { }; char HexagonEarlyIfConversion::ID = 0; -} + +} // end anonymous namespace INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif", "Hexagon early if conversion", false, false) @@ -209,7 +233,6 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { return L && SB == L->getHeader(); } - bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, FlowPattern &FP) { DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); @@ -217,7 +240,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, // Interested only in conditional branches, no .new, no new-value, etc. // Check the terminators directly, it's easier than handling all responses // from AnalyzeBranch. - MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TB = nullptr, *FB = nullptr; MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); if (T1I == B->end()) return false; @@ -228,7 +251,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, // Get the layout successor, or 0 if B does not have one. MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); - MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0; + MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : nullptr; MachineBasicBlock *T1B = T1I->getOperand(1).getMBB(); MachineBasicBlock::const_iterator T2I = std::next(T1I); @@ -273,9 +296,9 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, if (!TOk && !FOk) return false; - MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0; - MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0; - MachineBasicBlock *JB = 0; + MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : nullptr; + MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : nullptr; + MachineBasicBlock *JB = nullptr; if (TOk) { if (FOk) { @@ -286,14 +309,14 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, // TOk && !FOk if (TSB == FB) { JB = FB; - FB = 0; + FB = nullptr; } } } else { // !TOk && FOk (at least one must be true by now). if (FSB == TB) { JB = TB; - TB = 0; + TB = nullptr; } } // Don't try to predicate loop preheaders. @@ -308,7 +331,6 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, return true; } - // KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that // contains EH_LABEL. bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { @@ -318,7 +340,6 @@ bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { return false; } - // KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize // that a block can never fall-through. bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) @@ -332,7 +353,6 @@ bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) return false; } - bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) const { if (!B) @@ -357,10 +377,10 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) // update the use of it after predication). PHI uses will be updated // to use a result of a MUX, and a MUX cannot be created for predicate // registers. - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || !MO->isDef()) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO->getReg(); + unsigned R = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(R)) continue; if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass) @@ -373,12 +393,11 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) return true; } - bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { - for (ConstMIOperands MO(*MI); MO.isValid(); ++MO) { - if (!MO->isReg() || !MO->isUse()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isUse()) continue; - unsigned R = MO->getReg(); + unsigned R = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(R)) continue; const MachineInstr *DefI = MRI->getVRegDef(R); @@ -390,7 +409,6 @@ bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { return false; } - bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition return false; @@ -424,7 +442,6 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { return true; } - unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { assert(B->pred_size() <= 2); if (B->pred_size() < 2) @@ -443,21 +460,20 @@ unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { } MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg()); MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg()); - if (!TII->isPredicable(*Def1) || !TII->isPredicable(*Def3)) + if (!HII->isPredicable(*Def1) || !HII->isPredicable(*Def3)) Cost++; } return Cost; } - unsigned HexagonEarlyIfConversion::countPredicateDefs( const MachineBasicBlock *B) const { unsigned PredDefs = 0; for (auto &MI : *B) { - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || !MO->isDef()) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO->getReg(); + unsigned R = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(R)) continue; if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) @@ -467,7 +483,6 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( return PredDefs; } - bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { if (FP.TrueB && FP.FalseB) { @@ -547,7 +562,6 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { return true; } - bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, MachineLoop *L) { bool Changed = false; @@ -593,9 +607,8 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, return true; } - bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { - MachineBasicBlock *HB = L ? L->getHeader() : 0; + MachineBasicBlock *HB = L ? L->getHeader() : nullptr; DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) : dbgs() << "Visiting function") << "\n"); bool Changed = false; @@ -609,34 +622,29 @@ bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { return Changed; } - bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI) const { - // Exclude post-increment stores. Those return a value, so we cannot - // predicate them. + // HexagonInstrInfo::isPredicable will consider these stores are non- + // -predicable if the offset would become constant-extended after + // predication. unsigned Opc = MI->getOpcode(); - using namespace Hexagon; switch (Opc) { - // Store byte: - case S2_storerb_io: case S4_storerb_rr: - case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp: - // Store halfword: - case S2_storerh_io: case S4_storerh_rr: - case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp: - // Store upper halfword: - case S2_storerf_io: case S4_storerf_rr: - case S2_storerfabs: case S2_storerfgp: - // Store word: - case S2_storeri_io: case S4_storeri_rr: - case S2_storeriabs: case S4_storeiri_io: case S2_storerigp: - // Store doubleword: - case S2_storerd_io: case S4_storerd_rr: - case S2_storerdabs: case S2_storerdgp: + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerbnew_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerhnew_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerinew_io: + case Hexagon::S2_storerd_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: return true; } - return false; -} + // TargetInstrInfo::isPredicable takes a non-const pointer. + return MI->mayStore() && HII->isPredicable(const_cast<MachineInstr&>(*MI)); +} bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) const { @@ -650,59 +658,11 @@ bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) return true; } - unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc, bool IfTrue) const { - // Exclude post-increment stores. - using namespace Hexagon; - switch (Opc) { - case S2_storerb_io: - return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io; - case S4_storerb_rr: - return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr; - case S2_storerbabs: - case S2_storerbgp: - return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs; - case S4_storeirb_io: - return IfTrue ? S4_storeirbt_io : S4_storeirbf_io; - case S2_storerh_io: - return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io; - case S4_storerh_rr: - return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr; - case S2_storerhabs: - case S2_storerhgp: - return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs; - case S2_storerf_io: - return IfTrue ? S2_pstorerft_io : S2_pstorerff_io; - case S4_storerf_rr: - return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr; - case S2_storerfabs: - case S2_storerfgp: - return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs; - case S4_storeirh_io: - return IfTrue ? S4_storeirht_io : S4_storeirhf_io; - case S2_storeri_io: - return IfTrue ? S2_pstorerit_io : S2_pstorerif_io; - case S4_storeri_rr: - return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr; - case S2_storeriabs: - case S2_storerigp: - return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs; - case S4_storeiri_io: - return IfTrue ? S4_storeirit_io : S4_storeirif_io; - case S2_storerd_io: - return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io; - case S4_storerd_rr: - return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr; - case S2_storerdabs: - case S2_storerdgp: - return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs; - } - llvm_unreachable("Unexpected opcode"); - return 0; + return HII->getCondOpcode(Opc, !IfTrue); } - void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, MachineInstr *MI, unsigned PredR, bool IfTrue) { @@ -717,10 +677,15 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, if (isPredicableStore(MI)) { unsigned COpc = getCondStoreOpcode(Opc, IfTrue); assert(COpc); - MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc)) - .addReg(PredR); - for (MIOperands MO(*MI); MO.isValid(); ++MO) - MIB.addOperand(*MO); + MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, HII->get(COpc)); + MachineInstr::mop_iterator MOI = MI->operands_begin(); + if (HII->isPostIncrement(*MI)) { + MIB.addOperand(*MOI); + ++MOI; + } + MIB.addReg(PredR); + for (const MachineOperand &MO : make_range(MOI, MI->operands_end())) + MIB.addOperand(MO); // Set memory references. MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); @@ -733,7 +698,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, if (Opc == Hexagon::J2_jump) { MachineBasicBlock *TB = MI->getOperand(0).getMBB(); - const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt + const MCInstrDesc &D = HII->get(IfTrue ? Hexagon::J2_jumpt : Hexagon::J2_jumpf); BuildMI(*ToB, At, DL, D) .addReg(PredR) @@ -748,7 +713,6 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, llvm_unreachable("Unexpected instruction"); } - // Predicate/speculate non-branch instructions from FromB into block ToB. // Leave the branches alone, they will be handled later. Btw, at this point // FromB should have at most one branch, and it should be unconditional. @@ -769,7 +733,6 @@ void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB, } } - void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP) { // Visit all PHI nodes in the WhereB block and generate MUX instructions @@ -799,10 +762,25 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, assert(TR && FR); using namespace Hexagon; + unsigned DR = PN->getOperand(0).getReg(); const TargetRegisterClass *RC = MRI->getRegClass(DR); - const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux) - : TII->get(MUX64_rr); + unsigned Opc = 0; + if (RC == &IntRegsRegClass) + Opc = C2_mux; + else if (RC == &DoubleRegsRegClass) + Opc = PS_pselect; + else if (RC == &VectorRegsRegClass) + Opc = PS_vselect; + else if (RC == &VecDblRegsRegClass) + Opc = PS_wselect; + else if (RC == &VectorRegs128BRegClass) + Opc = PS_vselect_128B; + else if (RC == &VecDblRegs128BRegClass) + Opc = PS_wselect_128B; + else + llvm_unreachable("unexpected register type"); + const MCInstrDesc &D = HII->get(Opc); MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator(); DebugLoc DL; @@ -819,9 +797,8 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, } } - void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { - MachineBasicBlock *TSB = 0, *FSB = 0; + MachineBasicBlock *TSB = nullptr, *FSB = nullptr; MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator(); assert(OldTI != FP.SplitB->end()); DebugLoc DL = OldTI->getDebugLoc(); @@ -839,7 +816,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { // Regenerate new terminators in the split block and update the successors. // First, remember any information that may be needed later and remove the // existing terminators/successors from the split block. - MachineBasicBlock *SSB = 0; + MachineBasicBlock *SSB = nullptr; FP.SplitB->erase(OldTI, FP.SplitB->end()); while (FP.SplitB->succ_size() > 0) { MachineBasicBlock *T = *FP.SplitB->succ_begin(); @@ -870,21 +847,21 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { // generated. if (FP.JoinB) { assert(!SSB || SSB == FP.JoinB); - BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, HII->get(Hexagon::J2_jump)) .addMBB(FP.JoinB); FP.SplitB->addSuccessor(FP.JoinB); } else { bool HasBranch = false; if (TSB) { - BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt)) + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, HII->get(Hexagon::J2_jumpt)) .addReg(FP.PredR) .addMBB(TSB); FP.SplitB->addSuccessor(TSB); HasBranch = true; } if (FSB) { - const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump) - : TII->get(Hexagon::J2_jumpf); + const MCInstrDesc &D = HasBranch ? HII->get(Hexagon::J2_jump) + : HII->get(Hexagon::J2_jumpf); MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D); if (!HasBranch) MIB.addReg(FP.PredR); @@ -896,7 +873,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { // successor blocks of the TrueB and FalseB (or null of the TrueB // or FalseB block is null). SSB is the potential successor block // of the SplitB that is neither TrueB nor FalseB. - BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, HII->get(Hexagon::J2_jump)) .addMBB(SSB); FP.SplitB->addSuccessor(SSB); } @@ -915,7 +892,6 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { } } - void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); @@ -944,7 +920,6 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { MFN->erase(B->getIterator()); } - void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI(); @@ -963,7 +938,7 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { const DebugLoc &DL = PN->getDebugLoc(); const TargetRegisterClass *RC = MRI->getRegClass(DefR); NewR = MRI->createVirtualRegister(RC); - NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR) + NonPHI = BuildMI(*B, NonPHI, DL, HII->get(TargetOpcode::COPY), NewR) .addReg(UseR, 0, UseSR); } MRI->replaceRegWith(DefR, NewR); @@ -971,7 +946,6 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { } } - void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB) { for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { @@ -979,21 +953,20 @@ void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); for (P = SB->begin(); P != N; ++P) { MachineInstr &PN = *P; - for (MIOperands MO(PN); MO.isValid(); ++MO) - if (MO->isMBB() && MO->getMBB() == OldB) - MO->setMBB(NewB); + for (MachineOperand &MO : PN.operands()) + if (MO.isMBB() && MO.getMBB() == OldB) + MO.setMBB(NewB); } } } - void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB) { DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " << PrintMB(SuccB) << "\n"); bool TermOk = hasUncondBranch(SuccB); eliminatePhis(SuccB); - TII->RemoveBranch(*PredB); + HII->removeBranch(*PredB); PredB->removeSuccessor(SuccB); PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); @@ -1006,7 +979,6 @@ void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, PredB->updateTerminator(); } - void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { if (FP.TrueB) removeBlock(FP.TrueB); @@ -1030,13 +1002,12 @@ void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { mergeBlocks(FP.SplitB, SB); } - bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; - auto &ST = MF.getSubtarget(); - TII = ST.getInstrInfo(); + auto &ST = MF.getSubtarget<HexagonSubtarget>(); + HII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); MFN = &MF; MRI = &MF.getRegInfo(); @@ -1050,7 +1021,7 @@ bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) Changed |= visitLoop(*I); - Changed |= visitLoop(0); + Changed |= visitLoop(nullptr); return Changed; } @@ -1061,4 +1032,3 @@ bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { FunctionPass *llvm::createHexagonEarlyIfConversion() { return new HexagonEarlyIfConversion(); } - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp index bd5bb9c..8f070d8 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -85,78 +85,33 @@ // implicit uses will be added later, after predication. The extra price, // however, is that finding the locations where the implicit uses need // to be added, and updating the live ranges will be more involved. -// -// An additional problem appears when subregister liveness tracking is -// enabled. In such a scenario, the live interval for the super-register -// will have live ranges for each subregister (i.e. subranges). This sub- -// range contains all liveness information about the subregister, except -// for one case: a "read-undef" flag from another subregister will not -// be reflected: given -// vreg1:subreg_hireg<def,read-undef> = ... ; "undefines" subreg_loreg -// the subrange for subreg_loreg will not have any indication that it is -// undefined at this point. Calculating subregister liveness based only -// on the information from the subrange may create a segment which spans -// over such a "read-undef" flag. This would create inconsistencies in -// the liveness data, resulting in assertions or incorrect code. -// Example: -// vreg1:subreg_loreg<def> = ... -// vreg1:subreg_hireg<def, read-undef> = ... ; "undefines" subreg_loreg -// ... -// vreg1:subreg_loreg<def> = A2_tfrt ... ; may end up with imp-use -// ; of subreg_loreg -// The remedy takes advantage of the fact, that at this point we have -// an unconditional definition of the subregister. What this means is -// that any preceding value in this subregister will be overwritten, -// or in other words, the last use before this def is a kill. This also -// implies that the first of the predicated transfers at this location -// should not have any implicit uses. -// Assume for a moment that no part of the corresponding super-register -// is used as a source. In such case, the entire super-register can be -// considered undefined immediately before this instruction. Because of -// that, we can insert an IMPLICIT_DEF of the super-register at this -// location, which will cause it to be reflected in all the associated -// subranges. What is important here is that if an IMPLICIT_DEF of -// subreg_loreg was used, we would lose the indication that subreg_hireg -// is also considered undefined. This could lead to having implicit uses -// incorrectly added. -// -// What is left is the two cases when the super-register is used as a -// source. -// * Case 1: the used part is the same as the one that is defined: -// vreg1<def> = ... -// ... -// vreg1:subreg_loreg<def,read-undef> = C2_mux ..., vreg1:subreg_loreg -// In the end, the subreg_loreg should be marked as live at the point of -// the splitting: -// vreg1:subreg_loreg<def,read-undef> = A2_tfrt ; should have imp-use -// vreg1:subreg_loreg<def,read-undef> = A2_tfrf ; should have imp-use -// Hence, an IMPLICIT_DEF of only vreg1:subreg_hireg would be sufficient. -// * Case 2: the used part does not overlap the part being defined: -// vreg1<def> = ... -// ... -// vreg1:subreg_loreg<def,read-undef> = C2_mux ..., vreg1:subreg_hireg -// For this case, we insert an IMPLICIT_DEF of vreg1:subreg_hireg after -// the C2_mux. #define DEBUG_TYPE "expand-condsets" -#include "HexagonTargetMachine.h" +#include "HexagonInstrInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" - -#include <algorithm> +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> #include <iterator> #include <set> #include <utility> @@ -169,17 +124,21 @@ static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); namespace llvm { + void initializeHexagonExpandCondsetsPass(PassRegistry&); FunctionPass *createHexagonExpandCondsets(); -} + +} // end namespace llvm namespace { + class HexagonExpandCondsets : public MachineFunctionPass { public: static char ID; + HexagonExpandCondsets() : - MachineFunctionPass(ID), HII(0), TRI(0), MRI(0), - LIS(0), CoaLimitActive(false), + MachineFunctionPass(ID), HII(nullptr), TRI(nullptr), MRI(nullptr), + LIS(nullptr), CoaLimitActive(false), TfrLimitActive(false), CoaCounter(0), TfrCounter(0) { if (OptCoaLimit.getPosition()) CoaLimitActive = true, CoaLimit = OptCoaLimit; @@ -188,9 +147,8 @@ namespace { initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const override { - return "Hexagon Expand Condsets"; - } + StringRef getPassName() const override { return "Hexagon Expand Condsets"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); @@ -199,6 +157,7 @@ namespace { AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -207,7 +166,6 @@ namespace { MachineDominatorTree *MDT; MachineRegisterInfo *MRI; LiveIntervals *LIS; - std::set<MachineInstr*> LocalImpDefs; bool CoaLimitActive, TfrLimitActive; unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter; @@ -216,6 +174,7 @@ namespace { RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), Sub(Op.getSubReg()) {} RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} + bool operator== (RegisterRef RR) const { return Reg == RR.Reg && Sub == RR.Sub; } @@ -223,6 +182,7 @@ namespace { bool operator< (RegisterRef RR) const { return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub); } + unsigned Reg, Sub; }; @@ -236,7 +196,6 @@ namespace { void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); - void removeImpDefSegments(LiveRange &Range); void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range); void updateKillFlags(unsigned Reg); void updateDeadFlags(unsigned Reg); @@ -251,7 +210,6 @@ namespace { unsigned DstSR, const MachineOperand &PredOp, bool PredSense, bool ReadUndef, bool ImpUse); bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs); - bool splitInBlock(MachineBasicBlock &B, std::set<unsigned> &UpdRegs); bool isPredicable(MachineInstr *MI); MachineInstr *getReachingDefForPred(RegisterRef RD, @@ -272,12 +230,20 @@ namespace { bool isIntReg(RegisterRef RR, unsigned &BW); bool isIntraBlocks(LiveInterval &LI); bool coalesceRegisters(RegisterRef R1, RegisterRef R2); - bool coalesceSegments(MachineFunction &MF); + bool coalesceSegments(const SmallVectorImpl<MachineInstr*> &Condsets, + std::set<unsigned> &UpdRegs); }; -} + +} // end anonymous namespace char HexagonExpandCondsets::ID = 0; +namespace llvm { + + char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID; + +} // end namespace llvm + INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets", "Hexagon Expand Condsets", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -288,9 +254,11 @@ INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets", unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { switch (Sub) { - case Hexagon::subreg_loreg: + case Hexagon::isub_lo: + case Hexagon::vsub_lo: return Sub_Low; - case Hexagon::subreg_hireg: + case Hexagon::isub_hi: + case Hexagon::vsub_hi: return Sub_High; case Hexagon::NoSubRegister: return Sub_None; @@ -305,21 +273,19 @@ bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { case Hexagon::C2_muxii: case Hexagon::C2_muxir: case Hexagon::C2_muxri: - case Hexagon::MUX64_rr: + case Hexagon::PS_pselect: return true; break; } return false; } - LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) { assert(TargetRegisterInfo::isVirtualRegister(Reg)); return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) : MRI->getMaxLaneMaskForVReg(Reg); } - void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec) { unsigned Mask = getMaskForSub(RR.Sub) | Exec; @@ -330,7 +296,6 @@ void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, F->second |= Mask; } - bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec) { ReferenceMap::iterator F = Map.find(RR.Reg); @@ -342,7 +307,6 @@ bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, return false; } - void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void { // Set the <kill> flag on a use of Reg whose lane mask is contained in LM. @@ -392,16 +356,6 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { } } - -void HexagonExpandCondsets::removeImpDefSegments(LiveRange &Range) { - auto StartImpDef = [this] (LiveRange::Segment &S) -> bool { - return S.start.isRegister() && - LocalImpDefs.count(LIS->getInstructionFromIndex(S.start)); - }; - Range.segments.erase(std::remove_if(Range.begin(), Range.end(), StartImpDef), - Range.end()); -} - void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range) { assert(TargetRegisterInfo::isVirtualRegister(Reg)); @@ -415,7 +369,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg) return false; LaneBitmask SLM = getLaneMask(DR, DSR); - return (SLM & LM) != 0; + return (SLM & LM).any(); }; // The splitting step will create pairs of predicated definitions without @@ -425,7 +379,6 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, // We need to identify predicated defs that need implicit uses, and // dead defs that are not really dead, and correct both problems. - SetVector<MachineBasicBlock*> Defs; auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs, MachineBasicBlock *Dest) -> bool { for (MachineBasicBlock *D : Defs) @@ -449,20 +402,25 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, // First, try to extend live range within individual basic blocks. This // will leave us only with dead defs that do not reach any predicated // defs in the same block. + SetVector<MachineBasicBlock*> Defs; SmallVector<SlotIndex,4> PredDefs; for (auto &Seg : Range) { if (!Seg.start.isRegister()) continue; MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); - if (LocalImpDefs.count(DefI)) - continue; Defs.insert(DefI->getParent()); if (HII->isPredicated(*DefI)) PredDefs.push_back(Seg.start); } + + SmallVector<SlotIndex,8> Undefs; + LiveInterval &LI = LIS->getInterval(Reg); + LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes()); + for (auto &SI : PredDefs) { MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); - if (Range.extendInBlock(LIS->getMBBStartIdx(BB), SI)) + auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI); + if (P.first != nullptr || P.second) SI = SlotIndex(); } @@ -476,10 +434,21 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, if (BB->pred_empty()) continue; // If the defs from this range reach SI via all predecessors, it is live. + // It can happen that SI is reached by the defs through some paths, but + // not all. In the IR coming into this optimization, SI would not be + // considered live, since the defs would then not jointly dominate SI. + // That means that SI is an overwriting def, and no implicit use is + // needed at this point. Do not add SI to the extension points, since + // extendToIndices will abort if there is no joint dominance. + // If the abort was avoided by adding extra undefs added to Undefs, + // extendToIndices could actually indicate that SI is live, contrary + // to the original IR. if (Dominate(Defs, BB)) ExtTo.push_back(SI); } - LIS->extendToIndices(Range, ExtTo); + + if (!ExtTo.empty()) + LIS->extendToIndices(Range, ExtTo, Undefs); // Remove <dead> flags from all defs that are not dead after live range // extension, and collect all def operands. They will be used to generate @@ -489,8 +458,6 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, if (!Seg.start.isRegister()) continue; MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); - if (LocalImpDefs.count(DefI)) - continue; for (auto &Op : DefI->operands()) { if (Seg.start.isDead() || !IsRegDef(Op)) continue; @@ -499,40 +466,34 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, } } - // Finally, add implicit uses to each predicated def that is reached - // by other defs. Remove segments started by implicit-defs first, since - // they do not define registers. - removeImpDefSegments(Range); - + // by other defs. for (auto &Seg : Range) { if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot())) continue; MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); if (!HII->isPredicated(*DefI)) continue; - MachineFunction &MF = *DefI->getParent()->getParent(); // Construct the set of all necessary implicit uses, based on the def // operands in the instruction. std::set<RegisterRef> ImpUses; for (auto &Op : DefI->operands()) if (Op.isReg() && Op.isDef() && DefRegs.count(Op)) ImpUses.insert(Op); + if (ImpUses.empty()) + continue; + MachineFunction &MF = *DefI->getParent()->getParent(); for (RegisterRef R : ImpUses) MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub); } } - void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) { LiveInterval &LI = LIS->getInterval(Reg); if (LI.hasSubRanges()) { for (LiveInterval::SubRange &S : LI.subranges()) { updateDeadsInRange(Reg, S.LaneMask, S); LIS->shrinkToUses(S, Reg); - // LI::shrinkToUses will add segments started by implicit-defs. - // Remove them again. - removeImpDefSegments(S); } LI.clear(); LIS->constructMainRangeFromSubranges(LI); @@ -541,7 +502,6 @@ void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) { } } - void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) { LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); @@ -552,7 +512,6 @@ void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { MI.eraseFromParent(); } - void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet, bool Recalc, bool UpdateKills, bool UpdateDeads) { UpdateKills |= UpdateDeads; @@ -571,12 +530,12 @@ void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet, } } - /// Get the opcode for a conditional transfer of the value in SO (source /// operand). The condition (true/false) is given in Cond. unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, bool IfTrue) { using namespace Hexagon; + if (SO.isReg()) { unsigned PhysR; RegisterRef RS = SO; @@ -603,7 +562,6 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, llvm_unreachable("Unexpected source operand"); } - /// Generate a conditional transfer, copying the value SrcOp to the /// destination register DstR:DstSR, and using the predicate register from /// PredOp. The Cond argument specifies whether the predicate is to be @@ -623,19 +581,29 @@ MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp, /// predicate. unsigned Opc = getCondTfrOpcode(SrcOp, PredSense); - unsigned State = RegState::Define | (ReadUndef ? RegState::Undef : 0); - MachineInstrBuilder MIB = BuildMI(B, At, DL, HII->get(Opc)) - .addReg(DstR, State, DstSR) - .addOperand(PredOp) - .addOperand(SrcOp); - - // We don't want any kills yet. - MIB->clearKillInfo(); + unsigned DstState = RegState::Define | (ReadUndef ? RegState::Undef : 0); + unsigned PredState = getRegState(PredOp) & ~RegState::Kill; + MachineInstrBuilder MIB; + + if (SrcOp.isReg()) { + unsigned SrcState = getRegState(SrcOp); + if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR)) + SrcState &= ~RegState::Kill; + MIB = BuildMI(B, At, DL, HII->get(Opc)) + .addReg(DstR, DstState, DstSR) + .addReg(PredOp.getReg(), PredState, PredOp.getSubReg()) + .addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg()); + } else { + MIB = BuildMI(B, At, DL, HII->get(Opc)) + .addReg(DstR, DstState, DstSR) + .addReg(PredOp.getReg(), PredState, PredOp.getSubReg()) + .addOperand(SrcOp); + } + DEBUG(dbgs() << "created an initial copy: " << *MIB); return &*MIB; } - /// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function /// performs all necessary changes to complete the replacement. bool HexagonExpandCondsets::split(MachineInstr &MI, @@ -649,44 +617,36 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, << MI); MachineOperand &MD = MI.getOperand(0); // Definition MachineOperand &MP = MI.getOperand(1); // Predicate register - MachineOperand &MS1 = MI.getOperand(2); // Source value #1 - MachineOperand &MS2 = MI.getOperand(3); // Source value #2 assert(MD.isDef()); unsigned DR = MD.getReg(), DSR = MD.getSubReg(); bool ReadUndef = MD.isUndef(); MachineBasicBlock::iterator At = MI; - if (ReadUndef && DSR != 0 && MRI->shouldTrackSubRegLiveness(DR)) { - unsigned NewSR = 0; - MachineBasicBlock::iterator DefAt = At; - bool SameReg = (MS1.isReg() && DR == MS1.getReg()) || - (MS2.isReg() && DR == MS2.getReg()); - if (SameReg) { - NewSR = (DSR == Hexagon::subreg_loreg) ? Hexagon::subreg_hireg - : Hexagon::subreg_loreg; - // Advance the insertion point if the subregisters differ between - // the source and the target (with the same super-register). - // Note: this case has never occured during tests. - if ((MS1.isReg() && NewSR == MS1.getSubReg()) || - (MS2.isReg() && NewSR == MS2.getSubReg())) - ++DefAt; + // If this is a mux of the same register, just replace it with COPY. + // Ideally, this would happen earlier, so that register coalescing would + // see it. + MachineOperand &ST = MI.getOperand(2); + MachineOperand &SF = MI.getOperand(3); + if (ST.isReg() && SF.isReg()) { + RegisterRef RT(ST); + if (RT == RegisterRef(SF)) { + MI.setDesc(HII->get(TargetOpcode::COPY)); + unsigned S = getRegState(ST); + while (MI.getNumOperands() > 1) + MI.RemoveOperand(MI.getNumOperands()-1); + MachineFunction &MF = *MI.getParent()->getParent(); + MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub); + return true; } - // Use "At", since "DefAt" may be end(). - MachineBasicBlock &B = *At->getParent(); - DebugLoc DL = At->getDebugLoc(); - auto ImpD = BuildMI(B, DefAt, DL, HII->get(TargetOpcode::IMPLICIT_DEF)) - .addReg(DR, RegState::Define, NewSR); - LIS->InsertMachineInstrInMaps(*ImpD); - LocalImpDefs.insert(&*ImpD); } // First, create the two invididual conditional transfers, and add each // of them to the live intervals information. Do that first and then remove // the old instruction from live intervals. MachineInstr *TfrT = - genCondTfrFor(MI.getOperand(2), At, DR, DSR, MP, true, ReadUndef, false); + genCondTfrFor(ST, At, DR, DSR, MP, true, ReadUndef, false); MachineInstr *TfrF = - genCondTfrFor(MI.getOperand(3), At, DR, DSR, MP, false, ReadUndef, true); + genCondTfrFor(SF, At, DR, DSR, MP, false, ReadUndef, true); LIS->InsertMachineInstrInMaps(*TfrT); LIS->InsertMachineInstrInMaps(*TfrF); @@ -699,22 +659,6 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, return true; } - -/// Split all MUX instructions in the given block into pairs of conditional -/// transfers. -bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B, - std::set<unsigned> &UpdRegs) { - bool Changed = false; - MachineBasicBlock::iterator I, E, NextI; - for (I = B.begin(), E = B.end(); I != E; I = NextI) { - NextI = std::next(I); - if (isCondset(*I)) - Changed |= split(*I, UpdRegs); - } - return Changed; -} - - bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { if (HII->isPredicated(*MI) || !HII->isPredicable(*MI)) return false; @@ -735,7 +679,6 @@ bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { return true; } - /// Find the reaching definition for a predicated use of RD. The RD is used /// under the conditions given by PredR and Cond, and this function will ignore /// definitions that set RD under the opposite conditions. @@ -744,7 +687,7 @@ MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, MachineBasicBlock &B = *UseIt->getParent(); MachineBasicBlock::iterator I = UseIt, S = B.begin(); if (I == S) - return 0; + return nullptr; bool PredValid = true; do { @@ -775,15 +718,14 @@ MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, if (RR.Sub == RD.Sub) return MI; if (RR.Sub == 0 || RD.Sub == 0) - return 0; + return nullptr; // We have different subregisters, so we can continue looking. } } while (I != S); - return 0; + return nullptr; } - /// Check if the instruction MI can be safely moved over a set of instructions /// whose side-effects (in terms of register defs and uses) are expressed in /// the maps Defs and Uses. These maps reflect the conditional defs and uses @@ -813,7 +755,6 @@ bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, return true; } - /// Check if the instruction accessing memory (TheI) can be moved to the /// location ToI. bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI, @@ -848,7 +789,6 @@ bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI, return true; } - /// Generate a predicated version of MI (where the condition is given via /// PredR and Cond) at the point indicated by Where. void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp, @@ -909,7 +849,6 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp, UpdRegs.insert(Op.getReg()); } - /// In the range [First, Last], rename all references to the "old" register RO /// to the "new" register RN, but only in instructions predicated on the given /// condition. @@ -937,7 +876,6 @@ void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, } } - /// For a given conditional copy, predicate the definition of the source of /// the copy under the given condition (using the same predicate register as /// the copy). @@ -982,7 +920,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, // conditions when collecting def and use information. bool PredValid = true; for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { - if (!I->modifiesRegister(PredR, 0)) + if (!I->modifiesRegister(PredR, nullptr)) continue; PredValid = false; break; @@ -1013,6 +951,13 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, return false; ReferenceMap &Map = Op.isDef() ? Defs : Uses; + if (Op.isDef() && Op.isUndef()) { + assert(RR.Sub && "Expecting a subregister on <def,read-undef>"); + // If this is a <def,read-undef>, then it invalidates the non-written + // part of the register. For the purpose of checking the validity of + // the move, assume that it modifies the whole register. + RR.Sub = 0; + } addRefToMap(RR, Map, Exec); } } @@ -1067,7 +1012,6 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, return true; } - /// Predicate all cases of conditional copies in the specified block. bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, std::set<unsigned> &UpdRegs) { @@ -1094,7 +1038,6 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, return Changed; } - bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) return false; @@ -1110,7 +1053,6 @@ bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { return false; } - bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { LiveRange::Segment &LR = *I; @@ -1124,7 +1066,6 @@ bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { return true; } - bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { if (CoaLimitActive) { if (CoaCounter >= CoaLimit) @@ -1141,6 +1082,10 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { LiveInterval &L1 = LIS->getInterval(R1.Reg); LiveInterval &L2 = LIS->getInterval(R2.Reg); + if (L2.empty()) + return false; + if (L1.hasSubRanges() || L2.hasSubRanges()) + return false; bool Overlap = L1.overlaps(L2); DEBUG(dbgs() << "compatible registers: (" @@ -1176,6 +1121,7 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { } while (L2.begin() != L2.end()) L2.removeSegment(*L2.begin()); + LIS->removeInterval(R2.Reg); updateKillFlags(R1.Reg); DEBUG(dbgs() << "coalesced: " << L1 << "\n"); @@ -1184,28 +1130,22 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { return true; } - -/// Attempt to coalesce one of the source registers to a MUX intruction with +/// Attempt to coalesce one of the source registers to a MUX instruction with /// the destination register. This could lead to having only one predicated /// instruction in the end instead of two. -bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { - SmallVector<MachineInstr*,16> Condsets; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock &B = *I; - for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) { - MachineInstr *MI = &*J; - if (!isCondset(*MI)) - continue; - MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); - if (!S1.isReg() && !S2.isReg()) - continue; - Condsets.push_back(MI); - } +bool HexagonExpandCondsets::coalesceSegments( + const SmallVectorImpl<MachineInstr*> &Condsets, + std::set<unsigned> &UpdRegs) { + SmallVector<MachineInstr*,16> TwoRegs; + for (MachineInstr *MI : Condsets) { + MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); + if (!S1.isReg() && !S2.isReg()) + continue; + TwoRegs.push_back(MI); } bool Changed = false; - for (unsigned i = 0, n = Condsets.size(); i < n; ++i) { - MachineInstr *CI = Condsets[i]; + for (MachineInstr *CI : TwoRegs) { RegisterRef RD = CI->getOperand(0); RegisterRef RP = CI->getOperand(1); MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); @@ -1231,21 +1171,30 @@ bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { if (S1.isReg()) { RegisterRef RS = S1; MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); - if (!RDef || !HII->isPredicable(*RDef)) + if (!RDef || !HII->isPredicable(*RDef)) { Done = coalesceRegisters(RD, RegisterRef(S1)); + if (Done) { + UpdRegs.insert(RD.Reg); + UpdRegs.insert(S1.getReg()); + } + } } if (!Done && S2.isReg()) { RegisterRef RS = S2; MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); - if (!RDef || !HII->isPredicable(*RDef)) + if (!RDef || !HII->isPredicable(*RDef)) { Done = coalesceRegisters(RD, RegisterRef(S2)); + if (Done) { + UpdRegs.insert(RD.Reg); + UpdRegs.insert(S2.getReg()); + } + } } Changed |= Done; } return Changed; } - bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -1255,25 +1204,54 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { MDT = &getAnalysis<MachineDominatorTree>(); LIS = &getAnalysis<LiveIntervals>(); MRI = &MF.getRegInfo(); - LocalImpDefs.clear(); DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", MF.getFunction()->getParent())); bool Changed = false; - std::set<unsigned> SplitUpd, PredUpd; + std::set<unsigned> CoalUpd, PredUpd; + + SmallVector<MachineInstr*,16> Condsets; + for (auto &B : MF) + for (auto &I : B) + if (isCondset(I)) + Condsets.push_back(&I); // Try to coalesce the target of a mux with one of its sources. // This could eliminate a register copy in some circumstances. - Changed |= coalesceSegments(MF); + Changed |= coalesceSegments(Condsets, CoalUpd); + + // Update kill flags on all source operands. This is done here because + // at this moment (when expand-condsets runs), there are no kill flags + // in the IR (they have been removed by live range analysis). + // Updating them right before we split is the easiest, because splitting + // adds definitions which would interfere with updating kills afterwards. + std::set<unsigned> KillUpd; + for (MachineInstr *MI : Condsets) + for (MachineOperand &Op : MI->operands()) + if (Op.isReg() && Op.isUse()) + if (!CoalUpd.count(Op.getReg())) + KillUpd.insert(Op.getReg()); + updateLiveness(KillUpd, false, true, false); + DEBUG(LIS->print(dbgs() << "After coalescing\n", + MF.getFunction()->getParent())); // First, simply split all muxes into a pair of conditional transfers // and update the live intervals to reflect the new arrangement. The // goal is to update the kill flags, since predication will rely on // them. - for (auto &B : MF) - Changed |= splitInBlock(B, SplitUpd); - updateLiveness(SplitUpd, true, true, false); + for (MachineInstr *MI : Condsets) + Changed |= split(*MI, PredUpd); + Condsets.clear(); // The contents of Condsets are invalid here anyway. + + // Do not update live ranges after splitting. Recalculation of live + // intervals removes kill flags, which were preserved by splitting on + // the source operands of condsets. These kill flags are needed by + // predication, and after splitting they are difficult to recalculate + // (because of predicated defs), so make sure they are left untouched. + // Predication does not use live intervals. + DEBUG(LIS->print(dbgs() << "After splitting\n", + MF.getFunction()->getParent())); // Traverse all blocks and collapse predicable instructions feeding // conditional transfers into predicated instructions. @@ -1281,18 +1259,11 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { // cases that were not created in the previous step. for (auto &B : MF) Changed |= predicateInBlock(B, PredUpd); + DEBUG(LIS->print(dbgs() << "After predicating\n", + MF.getFunction()->getParent())); + PredUpd.insert(CoalUpd.begin(), CoalUpd.end()); updateLiveness(PredUpd, true, true, true); - // Remove from SplitUpd all registers contained in PredUpd to avoid - // unnecessary liveness recalculation. - std::set<unsigned> Diff; - std::set_difference(SplitUpd.begin(), SplitUpd.end(), - PredUpd.begin(), PredUpd.end(), - std::inserter(Diff, Diff.begin())); - updateLiveness(Diff, false, false, true); - - for (auto *ImpD : LocalImpDefs) - removeInstr(*ImpD); DEBUG({ if (Changed) @@ -1303,7 +1274,6 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { return Changed; } - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 3de817c..dfd1f1d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -47,10 +47,10 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { + StringRef getPassName() const override { return "Hexagon Hardware Loop Fixup"; } @@ -125,7 +125,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { BlockToInstOffset[&MBB] = InstOffset; for (const MachineInstr &MI : MBB) - InstOffset += HII->getSize(&MI); + InstOffset += HII->getSize(MI); } // Second pass - check each loop instruction to see if it needs to be @@ -138,7 +138,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { MachineBasicBlock::iterator MII = MBB.begin(); MachineBasicBlock::iterator MIE = MBB.end(); while (MII != MIE) { - InstOffset += HII->getSize(&*MII); + InstOffset += HII->getSize(*MII); if (MII->isDebugValue()) { ++MII; continue; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 2540214..a3f6273 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -17,25 +17,51 @@ #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" +#include "MCTargetDesc/HexagonBaseInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <map> +#include <new> +#include <utility> +#include <vector> // Hexagon stack frame layout as defined by the ABI: // @@ -99,27 +125,26 @@ // cated (reserved) register, it needs to be kept live throughout the function // to be available as the base register for local object accesses. // Normally, an address of a stack objects is obtained by a pseudo-instruction -// TFR_FI. To access local objects with the AP register present, a different -// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra -// argument compared to TFR_FI: the first input register is the AP register. +// PS_fi. To access local objects with the AP register present, a different +// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra +// argument compared to PS_fi: the first input register is the AP register. // This keeps the register live between its definition and its uses. -// The AP register is originally set up using pseudo-instruction ALIGNA: -// AP = ALIGNA A +// The AP register is originally set up using pseudo-instruction PS_aligna: +// AP = PS_aligna A // where // A - required stack alignment // The alignment value must be the maximum of all alignments required by // any stack object. -// The dynamic allocation uses a pseudo-instruction ALLOCA: -// Rd = ALLOCA Rs, A +// The dynamic allocation uses a pseudo-instruction PS_alloca: +// Rd = PS_alloca Rs, A // where // Rd - address of the allocated space // Rs - minimum size (the actual allocated can be larger to accommodate // alignment) // A - required alignment - using namespace llvm; static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret", @@ -145,9 +170,13 @@ static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable stack frame shrink wrapping")); -static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), - cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame " - "shrink-wraps")); +static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", + cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max count of stack frame shrink-wraps")); + +static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long", + cl::Hidden, cl::desc("Enable long calls for save-restore stubs."), + cl::init(false), cl::ZeroOrMore); static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true), cl::Hidden, cl::desc("Use allocframe more conservatively")); @@ -155,29 +184,41 @@ static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true), static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, cl::init(true), cl::desc("Optimize spill slots")); +#ifndef NDEBUG +static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden, + cl::init(std::numeric_limits<unsigned>::max())); +static unsigned SpillOptCount = 0; +#endif namespace llvm { + void initializeHexagonCallFrameInformationPass(PassRegistry&); FunctionPass *createHexagonCallFrameInformation(); -} + +} // end namespace llvm namespace { + class HexagonCallFrameInformation : public MachineFunctionPass { public: static char ID; + HexagonCallFrameInformation() : MachineFunctionPass(ID) { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeHexagonCallFrameInformationPass(PR); } + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } }; char HexagonCallFrameInformation::ID = 0; -} + +} // end anonymous namespace bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering(); @@ -197,12 +238,11 @@ FunctionPass *llvm::createHexagonCallFrameInformation() { return new HexagonCallFrameInformation(); } - -namespace { - /// Map a register pair Reg to the subregister that has the greater "number", - /// i.e. D3 (aka R7:6) will be mapped to R7, etc. - unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI, - bool hireg = true) { +/// Map a register pair Reg to the subregister that has the greater "number", +/// i.e. D3 (aka R7:6) will be mapped to R7, etc. +static unsigned getMax32BitSubRegister(unsigned Reg, + const TargetRegisterInfo &TRI, + bool hireg = true) { if (Reg < Hexagon::D0 || Reg > Hexagon::D15) return Reg; @@ -217,11 +257,11 @@ namespace { } } return RegNo; - } +} - /// Returns the callee saved register with the largest id in the vector. - unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo &TRI) { +/// Returns the callee saved register with the largest id in the vector. +static unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo &TRI) { static_assert(Hexagon::R1 > 0, "Assume physical registers are encoded as positive integers"); if (CSI.empty()) @@ -234,20 +274,20 @@ namespace { Max = Reg; } return Max; - } +} - /// Checks if the basic block contains any instruction that needs a stack - /// frame to be already in place. - bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, - const HexagonRegisterInfo &HRI) { +/// Checks if the basic block contains any instruction that needs a stack +/// frame to be already in place. +static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, + const HexagonRegisterInfo &HRI) { for (auto &I : MBB) { const MachineInstr *MI = &I; if (MI->isCall()) return true; unsigned Opc = MI->getOpcode(); switch (Opc) { - case Hexagon::ALLOCA: - case Hexagon::ALIGNA: + case Hexagon::PS_alloca: + case Hexagon::PS_aligna: return true; default: break; @@ -274,60 +314,62 @@ namespace { } } return false; - } +} /// Returns true if MBB has a machine instructions that indicates a tail call /// in the block. - bool hasTailCall(const MachineBasicBlock &MBB) { +static bool hasTailCall(const MachineBasicBlock &MBB) { MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); unsigned RetOpc = I->getOpcode(); - return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr; - } + return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r; +} - /// Returns true if MBB contains an instruction that returns. - bool hasReturn(const MachineBasicBlock &MBB) { +/// Returns true if MBB contains an instruction that returns. +static bool hasReturn(const MachineBasicBlock &MBB) { for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I) if (I->isReturn()) return true; return false; - } +} - /// Returns the "return" instruction from this block, or nullptr if there - /// isn't any. - MachineInstr *getReturn(MachineBasicBlock &MBB) { +/// Returns the "return" instruction from this block, or nullptr if there +/// isn't any. +static MachineInstr *getReturn(MachineBasicBlock &MBB) { for (auto &I : MBB) if (I.isReturn()) return &I; return nullptr; - } +} - bool isRestoreCall(unsigned Opc) { +static bool isRestoreCall(unsigned Opc) { switch (Opc) { case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC: + case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT: + case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC: case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4: case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC: return true; } return false; - } +} - inline bool isOptNone(const MachineFunction &MF) { +static inline bool isOptNone(const MachineFunction &MF) { return MF.getFunction()->hasFnAttribute(Attribute::OptimizeNone) || MF.getTarget().getOptLevel() == CodeGenOpt::None; - } +} - inline bool isOptSize(const MachineFunction &MF) { +static inline bool isOptSize(const MachineFunction &MF) { const Function &F = *MF.getFunction(); return F.optForSize() && !F.optForMinSize(); - } +} - inline bool isMinSize(const MachineFunction &MF) { +static inline bool isMinSize(const MachineFunction &MF) { return MF.getFunction()->optForMinSize(); - } } - /// Implements shrink-wrapping of the stack frame. By default, stack frame /// is created in the function entry block, and is cleaned up in every block /// that returns. This function finds alternate blocks: one for the frame @@ -342,7 +384,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, ShrinkCounter++; } - auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HRI = *HST.getRegisterInfo(); MachineDominatorTree MDT; @@ -432,7 +474,6 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, EpilogB = PDomB; } - /// Perform most of the PEI work here: /// - saving/restoring of the callee-saved registers, /// - stack frame creation and destruction. @@ -440,11 +481,11 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, /// in one place allows shrink-wrapping of the stack frame. void HexagonFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HRI = *HST.getRegisterInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr; if (EnableShrinkWrapping) @@ -453,6 +494,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, bool PrologueStubs = false; insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs); insertPrologueInBlock(*PrologB, PrologueStubs); + updateEntryPaths(MF, *PrologB); if (EpilogB) { insertCSRRestoresInBlock(*EpilogB, CSI, HRI); @@ -481,50 +523,49 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, // If there is an epilog block, it may not have a return instruction. // In such case, we need to add the callee-saved registers as live-ins // in all blocks on all paths from the epilog to any return block. - unsigned MaxBN = 0; - for (auto &B : MF) - if (B.getNumber() >= 0) - MaxBN = std::max(MaxBN, unsigned(B.getNumber())); + unsigned MaxBN = MF.getNumBlockIDs(); BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1); - updateExitPaths(*EpilogB, EpilogB, DoneT, DoneF, Path); + updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path); } } - void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const { MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); DebugLoc dl; - unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment()); + unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment()); // Calculate the total stack frame size. // Get the number of bytes to allocate from the FrameInfo. - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = MFI.getStackSize(); // Round up the max call frame size to the max alignment on the stack. - unsigned MaxCFA = alignTo(MFI->getMaxCallFrameSize(), MaxAlign); - MFI->setMaxCallFrameSize(MaxCFA); + unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign); + MFI.setMaxCallFrameSize(MaxCFA); FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign); - MFI->setStackSize(FrameSize); + MFI.setStackSize(FrameSize); bool AlignStack = (MaxAlign > getStackAlignment()); // Get the number of bytes to allocate from the FrameInfo. - unsigned NumBytes = MFI->getStackSize(); + unsigned NumBytes = MFI.getStackSize(); unsigned SP = HRI.getStackRegister(); - unsigned MaxCF = MFI->getMaxCallFrameSize(); + unsigned MaxCF = MFI.getMaxCallFrameSize(); MachineBasicBlock::iterator InsertPt = MBB.begin(); - auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>(); - auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts(); + SmallVector<MachineInstr *, 4> AdjustRegs; + for (auto &MBB : MF) + for (auto &MI : MBB) + if (MI.getOpcode() == Hexagon::PS_alloca) + AdjustRegs.push_back(&MI); for (auto MI : AdjustRegs) { - assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca"); + assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca"); expandAlloca(MI, HII, SP, MaxCF); MI->eraseFromParent(); } @@ -551,7 +592,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, // Subtract offset from frame pointer. // We use a caller-saved non-parameter register for that. unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg(); - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real), + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32), CallerSavedReg).addImm(NumBytes); BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP) .addReg(SP) @@ -572,7 +613,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, // registers inline (i.e. did not use a spill function), then call // the stack checker directly. if (EnableStackOVFSanitizer && !PrologueStubs) - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CALLstk)) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk)) .addExternalSymbol("__runtime_stack_check"); } @@ -581,7 +622,7 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { if (!hasFP(MF)) return; - auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); unsigned SP = HRI.getStackRegister(); @@ -608,7 +649,9 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc- // frame instruction if we encounter it. if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 || - RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC) { + RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC || + RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT || + RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) { MachineBasicBlock::iterator It = RetI; ++It; // Delete all instructions after the RESTORE (except labels). @@ -629,16 +672,19 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator PrevIt = std::prev(InsertPt); unsigned COpc = PrevIt->getOpcode(); if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 || - COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC) + COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC || + COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT || + COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC || + COpc == Hexagon::PS_call_nr || COpc == Hexagon::PS_callr_nr) NeedsDeallocframe = false; } if (!NeedsDeallocframe) return; - // If the returning instruction is JMPret, replace it with dealloc_return, + // If the returning instruction is PS_jmpret, replace it with dealloc_return, // otherwise just add deallocframe. The function could be returning via a // tail call. - if (RetOpc != Hexagon::JMPret || DisableDeallocRet) { + if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) { BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); return; } @@ -649,9 +695,30 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { MBB.erase(RetI); } +void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF, + MachineBasicBlock &SaveB) const { + SetVector<unsigned> Worklist; + + MachineBasicBlock &EntryB = MF.front(); + Worklist.insert(EntryB.getNumber()); + + unsigned SaveN = SaveB.getNumber(); + auto &CSI = MF.getFrameInfo().getCalleeSavedInfo(); + + for (unsigned i = 0; i < Worklist.size(); ++i) { + unsigned BN = Worklist[i]; + MachineBasicBlock &MBB = *MF.getBlockNumbered(BN); + for (auto &R : CSI) + if (!MBB.isLiveIn(R.getReg())) + MBB.addLiveIn(R.getReg()); + if (BN != SaveN) + for (auto &SB : MBB.successors()) + Worklist.insert(SB->getNumber()); + } +} bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB, - MachineBasicBlock *RestoreB, BitVector &DoneT, BitVector &DoneF, + MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF, BitVector &Path) const { assert(MBB.getNumber() >= 0); unsigned BN = MBB.getNumber(); @@ -660,7 +727,7 @@ bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB, if (DoneT[BN]) return true; - auto &CSI = MBB.getParent()->getFrameInfo()->getCalleeSavedInfo(); + auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo(); Path[BN] = true; bool ReachedExit = false; @@ -681,7 +748,7 @@ bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB, // We don't want to add unnecessary live-ins to the restore block: since // the callee-saved registers are being defined in it, the entry of the // restore block cannot be on the path from the definitions to any exit. - if (ReachedExit && &MBB != RestoreB) { + if (ReachedExit && &MBB != &RestoreB) { for (auto &R : CSI) if (!MBB.isLiveIn(R.getReg())) MBB.addLiveIn(R.getReg()); @@ -694,42 +761,49 @@ bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB, return ReachedExit; } - -namespace { - bool IsAllocFrame(MachineBasicBlock::const_iterator It) { - if (!It->isBundle()) - return It->getOpcode() == Hexagon::S2_allocframe; - auto End = It->getParent()->instr_end(); - MachineBasicBlock::const_instr_iterator I = It.getInstrIterator(); - while (++I != End && I->isBundled()) - if (I->getOpcode() == Hexagon::S2_allocframe) - return true; - return false; - } - - MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) { - for (auto &I : B) - if (IsAllocFrame(I)) - return I; - return B.end(); - } +static Optional<MachineBasicBlock::iterator> +findCFILocation(MachineBasicBlock &B) { + // The CFI instructions need to be inserted right after allocframe. + // An exception to this is a situation where allocframe is bundled + // with a call: then the CFI instructions need to be inserted before + // the packet with the allocframe+call (in case the call throws an + // exception). + auto End = B.instr_end(); + + for (MachineInstr &I : B) { + MachineBasicBlock::iterator It = I.getIterator(); + if (!I.isBundle()) { + if (I.getOpcode() == Hexagon::S2_allocframe) + return std::next(It); + continue; + } + // I is a bundle. + bool HasCall = false, HasAllocFrame = false; + auto T = It.getInstrIterator(); + while (++T != End && T->isBundled()) { + if (T->getOpcode() == Hexagon::S2_allocframe) + HasAllocFrame = true; + else if (T->isCall()) + HasCall = true; + } + if (HasAllocFrame) + return HasCall ? It : std::next(It); + } + return None; } - void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { for (auto &B : MF) { - auto AF = FindAllocFrame(B); - if (AF == B.end()) - continue; - insertCFIInstructionsAt(B, ++AF); + auto At = findCFILocation(B); + if (At.hasValue()) + insertCFIInstructionsAt(B, At.getValue()); } } - void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator At) const { MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HII = *HST.getInstrInfo(); @@ -761,15 +835,15 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, // MCCFIInstruction::createOffset takes the offset without sign change. auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8); BuildMI(MBB, At, DL, CFID) - .addCFIIndex(MMI.addFrameInst(DefCfa)); + .addCFIIndex(MF.addFrameInst(DefCfa)); // R31 (return addr) = CFA - 4 auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4); BuildMI(MBB, At, DL, CFID) - .addCFIIndex(MMI.addFrameInst(OffR31)); + .addCFIIndex(MF.addFrameInst(OffR31)); // R30 (frame ptr) = CFA - 8 auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8); BuildMI(MBB, At, DL, CFID) - .addCFIIndex(MMI.addFrameInst(OffR30)); + .addCFIIndex(MF.addFrameInst(OffR30)); } static unsigned int RegsToMove[] = { @@ -789,7 +863,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool { return C.getReg() == Reg; }; - auto F = std::find_if(CSI.begin(), CSI.end(), IfR); + auto F = find_if(CSI, IfR); if (F == CSI.end()) continue; @@ -815,7 +889,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg, Offset); BuildMI(MBB, At, DL, CFID) - .addCFIIndex(MMI.addFrameInst(OffReg)); + .addCFIIndex(MF.addFrameInst(OffReg)); } else { // Split the double regs into subregs, and generate appropriate // cfi_offsets. @@ -823,25 +897,24 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, // understand paired registers for cfi_offset. // Eg .cfi_offset r1:0, -64 - unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg); - unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg); + unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi); + unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo); unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, Offset+4); BuildMI(MBB, At, DL, CFID) - .addCFIIndex(MMI.addFrameInst(OffHi)); + .addCFIIndex(MF.addFrameInst(OffHi)); auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg, Offset); BuildMI(MBB, At, DL, CFID) - .addCFIIndex(MMI.addFrameInst(OffLo)); + .addCFIIndex(MF.addFrameInst(OffLo)); } } } - bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { - auto &MFI = *MF.getFrameInfo(); + auto &MFI = MF.getFrameInfo(); auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); bool HasFixed = MFI.getNumFixedObjects(); @@ -877,7 +950,6 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { return false; } - enum SpillKind { SK_ToMem, SK_FromMem, @@ -952,13 +1024,12 @@ static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType, default: llvm_unreachable("Unhandled maximum callee save register"); } - return 0; + return nullptr; } - int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { - auto &MFI = *MF.getFrameInfo(); + auto &MFI = MF.getFrameInfo(); auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); int Offset = MFI.getObjectOffset(FI); @@ -1039,7 +1110,6 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, return RealOffset; } - bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI, bool &PrologueStubs) const { @@ -1049,7 +1119,8 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI = MBB.begin(); PrologueStubs = false; MachineFunction &MF = *MBB.getParent(); - auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); if (useSpillFunction(MF, CSI)) { PrologueStubs = true; @@ -1059,20 +1130,31 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, StkOvrFlowEnabled); auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); bool IsPIC = HTM.isPositionIndependent(); + bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong; // Call spill function. DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); unsigned SpillOpc; - if (StkOvrFlowEnabled) - SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC - : Hexagon::SAVE_REGISTERS_CALL_V4STK; - else - SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC - : Hexagon::SAVE_REGISTERS_CALL_V4; + if (StkOvrFlowEnabled) { + if (LongCalls) + SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC + : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT; + else + SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC + : Hexagon::SAVE_REGISTERS_CALL_V4STK; + } else { + if (LongCalls) + SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC + : Hexagon::SAVE_REGISTERS_CALL_V4_EXT; + else + SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC + : Hexagon::SAVE_REGISTERS_CALL_V4; + } MachineInstr *SaveRegsCall = BuildMI(MBB, MI, DL, HII.get(SpillOpc)) .addExternalSymbol(SpillFun); + // Add callee-saved registers as use. addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true); // Add live in registers. @@ -1096,7 +1178,6 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, return true; } - bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { if (CSI.empty()) @@ -1104,7 +1185,8 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); MachineFunction &MF = *MBB.getParent(); - auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); if (useRestoreFunction(MF, CSI)) { bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); @@ -1113,6 +1195,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, const char *RestoreFn = getSpillFunctionFor(MaxR, Kind); auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); bool IsPIC = HTM.isPositionIndependent(); + bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong; // Call spill function. DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() @@ -1120,17 +1203,27 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, MachineInstr *DeallocCall = nullptr; if (HasTC) { - unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC - : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; - DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc)) + unsigned RetOpc; + if (LongCalls) + RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC + : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT; + else + RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC + : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; + DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc)) .addExternalSymbol(RestoreFn); } else { // The block has a return. MachineBasicBlock::iterator It = MBB.getFirstTerminator(); assert(It->isReturn() && std::next(It) == MBB.end()); - unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC - : Hexagon::RESTORE_DEALLOC_RET_JMP_V4; - DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc)) + unsigned RetOpc; + if (LongCalls) + RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC + : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT; + else + RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC + : Hexagon::RESTORE_DEALLOC_RET_JMP_V4; + DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc)) .addExternalSymbol(RestoreFn); // Transfer the function live-out registers. DeallocCall->copyImplicitOps(MF, *It); @@ -1160,39 +1253,38 @@ MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(I); } - void HexagonFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { // If this function has uses aligned stack and also has variable sized stack // objects, then we need to map all spill slots to fixed positions, so that // they can be accessed through FP. Otherwise they would have to be accessed // via AP, which may not be available at the particular place in the program. - MachineFrameInfo *MFI = MF.getFrameInfo(); - bool HasAlloca = MFI->hasVarSizedObjects(); - bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment()); + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool HasAlloca = MFI.hasVarSizedObjects(); + bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment()); if (!HasAlloca || !NeedsAlign) return; - unsigned LFS = MFI->getLocalFrameSize(); - for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i)) + unsigned LFS = MFI.getLocalFrameSize(); + for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { + if (!MFI.isSpillSlotObjectIndex(i) || MFI.isDeadObjectIndex(i)) continue; - unsigned S = MFI->getObjectSize(i); + unsigned S = MFI.getObjectSize(i); // Reduce the alignment to at most 8. This will require unaligned vector // stores if they happen here. - unsigned A = std::max(MFI->getObjectAlignment(i), 8U); - MFI->setObjectAlignment(i, 8); + unsigned A = std::max(MFI.getObjectAlignment(i), 8U); + MFI.setObjectAlignment(i, 8); LFS = alignTo(LFS+S, A); - MFI->mapLocalFrameObject(i, -LFS); + MFI.mapLocalFrameObject(i, -LFS); } - MFI->setLocalFrameSize(LFS); - unsigned A = MFI->getLocalFrameMaxAlign(); + MFI.setLocalFrameSize(LFS); + unsigned A = MFI.getLocalFrameMaxAlign(); assert(A <= 8 && "Unexpected local frame alignment"); if (A == 0) - MFI->setLocalFrameMaxAlign(8); - MFI->setUseLocalStackAllocationBlock(true); + MFI.setLocalFrameMaxAlign(8); + MFI.setUseLocalStackAllocationBlock(true); // Set the physical aligned-stack base address register. unsigned AP = 0; @@ -1224,7 +1316,6 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF, return true; } - #ifndef NDEBUG static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { dbgs() << '{'; @@ -1236,12 +1327,11 @@ static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { } #endif - bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { - DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on " + DEBUG(dbgs() << __func__ << " on " << MF.getFunction()->getName() << '\n'); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector SRegs(Hexagon::NUM_TARGET_REGS); // Generate a set of unique, callee-saved registers (SRegs), where each @@ -1321,7 +1411,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, if (!SRegs[S->Reg]) continue; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); - int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset); + int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), S->Offset); MinOffset = std::min(MinOffset, S->Offset); CSI.push_back(CalleeSavedInfo(S->Reg, FI)); SRegs[S->Reg] = false; @@ -1337,7 +1427,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, unsigned Align = std::min(RC->getAlignment(), getStackAlignment()); assert(isPowerOf2_32(Align)); Off &= -Align; - int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off); + int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), Off); MinOffset = std::min(MinOffset, Off); CSI.push_back(CalleeSavedInfo(R, FI)); SRegs[R] = false; @@ -1347,7 +1437,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, dbgs() << "CS information: {"; for (unsigned i = 0, n = CSI.size(); i < n; ++i) { int FI = CSI[i].getFrameIdx(); - int Off = MFI->getObjectOffset(FI); + int Off = MFI.getObjectOffset(FI); dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp"; if (Off >= 0) dbgs() << '+'; @@ -1371,7 +1461,6 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, return true; } - bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B, MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { @@ -1398,12 +1487,13 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B, MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { MachineInstr *MI = &*It; + if (!MI->getOperand(0).isFI()) + return false; + DebugLoc DL = MI->getDebugLoc(); unsigned Opc = MI->getOpcode(); unsigned SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); - - assert(MI->getOperand(0).isFI() && "Expect a frame index"); int FI = MI->getOperand(0).getIndex(); // TmpR = C2_tfrpr SrcR if SrcR is a predicate register @@ -1430,11 +1520,12 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B, MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { MachineInstr *MI = &*It; + if (!MI->getOperand(1).isFI()) + return false; + DebugLoc DL = MI->getDebugLoc(); unsigned Opc = MI->getOpcode(); unsigned DstR = MI->getOperand(0).getReg(); - - assert(MI->getOperand(1).isFI() && "Expect a frame index"); int FI = MI->getOperand(1).getIndex(); // TmpR = L2_loadri_io FI, 0 @@ -1456,17 +1547,17 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B, return true; } - bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>(); MachineInstr *MI = &*It; + if (!MI->getOperand(0).isFI()) + return false; + DebugLoc DL = MI->getDebugLoc(); unsigned SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); - - assert(MI->getOperand(0).isFI() && "Expect a frame index"); int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); @@ -1503,10 +1594,11 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>(); MachineInstr *MI = &*It; + if (!MI->getOperand(1).isFI()) + return false; + DebugLoc DL = MI->getDebugLoc(); unsigned DstR = MI->getOperand(0).getReg(); - - assert(MI->getOperand(1).isFI() && "Expect a frame index"); int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); @@ -1541,17 +1633,27 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { MachineFunction &MF = *B.getParent(); auto &HST = MF.getSubtarget<HexagonSubtarget>(); - auto &MFI = *MF.getFrameInfo(); + auto &MFI = MF.getFrameInfo(); auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); MachineInstr *MI = &*It; - DebugLoc DL = MI->getDebugLoc(); + if (!MI->getOperand(0).isFI()) + return false; + + // It is possible that the double vector being stored is only partially + // defined. From the point of view of the liveness tracking, it is ok to + // store it as a whole, but if we break it up we may end up storing a + // register that is entirely undefined. + LivePhysRegs LPR(&HRI); + LPR.addLiveIns(B); + SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers; + for (auto R = B.begin(); R != It; ++R) + LPR.stepForward(*R, Clobbers); + DebugLoc DL = MI->getDebugLoc(); unsigned SrcR = MI->getOperand(2).getReg(); - unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::subreg_loreg); - unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::subreg_hireg); + unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo); + unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi); bool IsKill = MI->getOperand(2).isKill(); - - assert(MI->getOperand(0).isFI() && "Expect a frame index"); int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); @@ -1563,28 +1665,32 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, unsigned StoreOpc; // Store low part. - if (NeedAlign <= HasAlign) - StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B; - else - StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B; + if (LPR.contains(SrcLo)) { + if (NeedAlign <= HasAlign) + StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B; + else + StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B; - BuildMI(B, It, DL, HII.get(StoreOpc)) - .addFrameIndex(FI) - .addImm(0) - .addReg(SrcLo, getKillRegState(IsKill)) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + BuildMI(B, It, DL, HII.get(StoreOpc)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcLo, getKillRegState(IsKill)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + } - // Load high part. - if (NeedAlign <= MinAlign(HasAlign, Size)) - StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B; - else - StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B; + // Store high part. + if (LPR.contains(SrcHi)) { + if (NeedAlign <= MinAlign(HasAlign, Size)) + StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B; + else + StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B; - BuildMI(B, It, DL, HII.get(StoreOpc)) - .addFrameIndex(FI) - .addImm(Size) - .addReg(SrcHi, getKillRegState(IsKill)) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + BuildMI(B, It, DL, HII.get(StoreOpc)) + .addFrameIndex(FI) + .addImm(Size) + .addReg(SrcHi, getKillRegState(IsKill)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + } B.erase(It); return true; @@ -1595,16 +1701,16 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { MachineFunction &MF = *B.getParent(); auto &HST = MF.getSubtarget<HexagonSubtarget>(); - auto &MFI = *MF.getFrameInfo(); + auto &MFI = MF.getFrameInfo(); auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); MachineInstr *MI = &*It; - DebugLoc DL = MI->getDebugLoc(); + if (!MI->getOperand(1).isFI()) + return false; + DebugLoc DL = MI->getDebugLoc(); unsigned DstR = MI->getOperand(0).getReg(); - unsigned DstHi = HRI.getSubReg(DstR, Hexagon::subreg_hireg); - unsigned DstLo = HRI.getSubReg(DstR, Hexagon::subreg_loreg); - - assert(MI->getOperand(1).isFI() && "Expect a frame index"); + unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi); + unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo); int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); @@ -1646,14 +1752,14 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { MachineFunction &MF = *B.getParent(); auto &HST = MF.getSubtarget<HexagonSubtarget>(); - auto &MFI = *MF.getFrameInfo(); + auto &MFI = MF.getFrameInfo(); MachineInstr *MI = &*It; - DebugLoc DL = MI->getDebugLoc(); + if (!MI->getOperand(0).isFI()) + return false; + DebugLoc DL = MI->getDebugLoc(); unsigned SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); - - assert(MI->getOperand(0).isFI() && "Expect a frame index"); int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); @@ -1684,13 +1790,13 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { MachineFunction &MF = *B.getParent(); auto &HST = MF.getSubtarget<HexagonSubtarget>(); - auto &MFI = *MF.getFrameInfo(); + auto &MFI = MF.getFrameInfo(); MachineInstr *MI = &*It; - DebugLoc DL = MI->getDebugLoc(); + if (!MI->getOperand(1).isFI()) + return false; + DebugLoc DL = MI->getDebugLoc(); unsigned DstR = MI->getOperand(0).getReg(); - - assert(MI->getOperand(1).isFI() && "Expect a frame index"); int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); @@ -1715,7 +1821,6 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, return true; } - bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF, SmallVectorImpl<unsigned> &NewRegs) const { auto &HST = MF.getSubtarget<HexagonSubtarget>(); @@ -1743,30 +1848,26 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF, case Hexagon::LDriw_mod: Changed |= expandLoadInt(B, I, MRI, HII, NewRegs); break; - case Hexagon::STriq_pred_V6: - case Hexagon::STriq_pred_V6_128B: + case Hexagon::PS_vstorerq_ai: + case Hexagon::PS_vstorerq_ai_128B: Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs); break; - case Hexagon::LDriq_pred_V6: - case Hexagon::LDriq_pred_V6_128B: + case Hexagon::PS_vloadrq_ai: + case Hexagon::PS_vloadrq_ai_128B: Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs); break; - case Hexagon::LDrivv_pseudo_V6: - case Hexagon::LDrivv_pseudo_V6_128B: + case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrwu_ai: + case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrwu_ai_128B: Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs); break; - case Hexagon::STrivv_pseudo_V6: - case Hexagon::STrivv_pseudo_V6_128B: + case Hexagon::PS_vstorerw_ai: + case Hexagon::PS_vstorerwu_ai: + case Hexagon::PS_vstorerw_ai_128B: + case Hexagon::PS_vstorerwu_ai_128B: Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs); break; - case Hexagon::STriv_pseudo_V6: - case Hexagon::STriv_pseudo_V6_128B: - Changed |= expandStoreVec(B, I, MRI, HII, NewRegs); - break; - case Hexagon::LDriv_pseudo_V6: - case Hexagon::LDriv_pseudo_V6_128B: - Changed |= expandLoadVec(B, I, MRI, HII, NewRegs); - break; } } } @@ -1774,7 +1875,6 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF, return Changed; } - void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -1797,8 +1897,8 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, // We need to reserve a a spill slot if scavenging could potentially require // spilling a scavenged register. - if (!NewRegs.empty()) { - MachineFrameInfo &MFI = *MF.getFrameInfo(); + if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) { + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); SetVector<const TargetRegisterClass*> SpillRCs; // Reserve an int register in any case, because it could be used to hold @@ -1823,7 +1923,6 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); } - unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF, HexagonBlockRanges::IndexRange &FIR, HexagonBlockRanges::InstrIndexMap &IndexMap, @@ -1872,29 +1971,16 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, struct SlotInfo { BlockRangeMap Map; - unsigned Size; - const TargetRegisterClass *RC; + unsigned Size = 0; + const TargetRegisterClass *RC = nullptr; - SlotInfo() : Map(), Size(0), RC(nullptr) {} + SlotInfo() = default; }; BlockIndexMap BlockIndexes; SmallSet<int,4> BadFIs; std::map<int,SlotInfo> FIRangeMap; - auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R) - -> const TargetRegisterClass* { - if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) - assert(R.Sub == 0); - if (TargetRegisterInfo::isVirtualRegister(R.Reg)) { - auto *RCR = MRI.getRegClass(R.Reg); - if (R.Sub == 0) - return RCR; - unsigned PR = *RCR->begin(); - R.Reg = HRI.getSubReg(PR, R.Sub); - } - return HRI.getMinimalPhysRegClass(R.Reg); - }; // Accumulate register classes: get a common class for a pre-existing // class HaveRC and a new class NewRC. Return nullptr if a common class // cannot be found, otherwise return the resulting class. If HaveRC is @@ -1942,19 +2028,13 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, // this restriction. if (Load || Store) { int TFI = Load ? LFI : SFI; - unsigned AM = HII.getAddrMode(&In); + unsigned AM = HII.getAddrMode(In); SlotInfo &SI = FIRangeMap[TFI]; bool Bad = (AM != HexagonII::BaseImmOffset); if (!Bad) { // If the addressing mode is ok, check the register class. - const TargetRegisterClass *RC = nullptr; - if (Load) { - MachineOperand &DataOp = In.getOperand(0); - RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()}); - } else { - MachineOperand &DataOp = In.getOperand(2); - RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()}); - } + unsigned OpNum = Load ? 0 : 2; + auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF); RC = getCommonRC(SI.RC, RC); if (RC == nullptr) Bad = true; @@ -1963,12 +2043,20 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, } if (!Bad) { // Check sizes. - unsigned S = (1U << (HII.getMemAccessSize(&In) - 1)); + unsigned S = (1U << (HII.getMemAccessSize(In) - 1)); if (SI.Size != 0 && SI.Size != S) Bad = true; else SI.Size = S; } + if (!Bad) { + for (auto *Mo : In.memoperands()) { + if (!Mo->isVolatile()) + continue; + Bad = true; + break; + } + } if (Bad) BadFIs.insert(TFI); } @@ -2081,6 +2169,10 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, } }); +#ifndef NDEBUG + bool HasOptLimit = SpillOptMax.getPosition(); +#endif + // eliminate loads, when all loads eliminated, eliminate all stores. for (auto &B : MF) { auto F = BlockIndexes.find(&B); @@ -2101,26 +2193,33 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, if (!IndexType::isInstr(Range.start()) || !IndexType::isInstr(Range.end())) continue; - MachineInstr *SI = IM.getInstr(Range.start()); - MachineInstr *EI = IM.getInstr(Range.end()); - assert(SI->mayStore() && "Unexpected start instruction"); - assert(EI->mayLoad() && "Unexpected end instruction"); - MachineOperand &SrcOp = SI->getOperand(2); + MachineInstr &SI = *IM.getInstr(Range.start()); + MachineInstr &EI = *IM.getInstr(Range.end()); + assert(SI.mayStore() && "Unexpected start instruction"); + assert(EI.mayLoad() && "Unexpected end instruction"); + MachineOperand &SrcOp = SI.getOperand(2); HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), SrcOp.getSubReg() }; - auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()}); + auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF); // The this-> is needed to unconfuse MSVC. unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC); DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n'); if (FoundR == 0) continue; +#ifndef NDEBUG + if (HasOptLimit) { + if (SpillOptCount >= SpillOptMax) + return; + SpillOptCount++; + } +#endif // Generate the copy-in: "FoundR = COPY SrcR" at the store location. - MachineBasicBlock::iterator StartIt = SI, NextIt; + MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt; MachineInstr *CopyIn = nullptr; if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) { - const DebugLoc &DL = SI->getDebugLoc(); + const DebugLoc &DL = SI.getDebugLoc(); CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR) .addOperand(SrcOp); } @@ -2137,33 +2236,33 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, // We are keeping this register live. SrcOp.setIsKill(false); } else { - B.erase(SI); - IM.replaceInstr(SI, CopyIn); + B.erase(&SI); + IM.replaceInstr(&SI, CopyIn); } - auto EndIt = std::next(MachineBasicBlock::iterator(EI)); + auto EndIt = std::next(EI.getIterator()); for (auto It = StartIt; It != EndIt; It = NextIt) { - MachineInstr *MI = &*It; + MachineInstr &MI = *It; NextIt = std::next(It); int TFI; - if (!HII.isLoadFromStackSlot(*MI, TFI) || TFI != FI) + if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI) continue; - unsigned DstR = MI->getOperand(0).getReg(); - assert(MI->getOperand(0).getSubReg() == 0); + unsigned DstR = MI.getOperand(0).getReg(); + assert(MI.getOperand(0).getSubReg() == 0); MachineInstr *CopyOut = nullptr; if (DstR != FoundR) { - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1)); assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset); unsigned CopyOpc = TargetOpcode::COPY; - if (HII.isSignExtendingLoad(*MI)) + if (HII.isSignExtendingLoad(MI)) CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth; - else if (HII.isZeroExtendingLoad(*MI)) + else if (HII.isZeroExtendingLoad(MI)) CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth; CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR) - .addReg(FoundR, getKillRegState(MI == EI)); + .addReg(FoundR, getKillRegState(&MI == &EI)); } - IM.replaceInstr(MI, CopyOut); + IM.replaceInstr(&MI, CopyOut); B.erase(It); } @@ -2176,7 +2275,6 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, } } - void HexagonFrameLowering::expandAlloca(MachineInstr *AI, const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { MachineBasicBlock &MB = *AI->getParent(); @@ -2235,28 +2333,25 @@ void HexagonFrameLowering::expandAlloca(MachineInstr *AI, } } - bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (!MFI->hasVarSizedObjects()) + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.hasVarSizedObjects()) return false; - unsigned MaxA = MFI->getMaxAlignment(); + unsigned MaxA = MFI.getMaxAlignment(); if (MaxA <= getStackAlignment()) return false; return true; } - const MachineInstr *HexagonFrameLowering::getAlignaInstr( const MachineFunction &MF) const { for (auto &B : MF) for (auto &I : B) - if (I.getOpcode() == Hexagon::ALIGNA) + if (I.getOpcode() == Hexagon::PS_aligna) return &I; return nullptr; } - /// Adds all callee-saved registers as implicit uses or defs to the /// instruction. void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, @@ -2266,7 +2361,6 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill)); } - /// Determine whether the callee-saved register saves and restores should /// be generated via inline code. If this function returns "true", inline /// code will be generated. If this function returns "false", additional @@ -2301,7 +2395,6 @@ bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, return false; } - bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const { if (shouldInlineCSR(MF, CSI)) @@ -2315,7 +2408,6 @@ bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, return Threshold < NumCSI; } - bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const { if (shouldInlineCSR(MF, CSI)) @@ -2336,3 +2428,14 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, : SpillFuncThreshold; return Threshold < NumCSI; } + +bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const { + unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + // A fairly simplistic guess as to whether a potential load/store to a + // stack location could require an extra register. It does not account + // for store-immediate instructions. + if (HST.useHVXOps()) + return StackSize > 256; + return false; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h index 3e76214..529a61d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -12,7 +12,11 @@ #include "Hexagon.h" #include "HexagonBlockRanges.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetFrameLowering.h" +#include <vector> namespace llvm { @@ -31,11 +35,13 @@ public: override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override {} + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const override { return true; } + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const override { @@ -53,6 +59,7 @@ public: bool targetHandlesStackFrameRounding() const override { return true; } + int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; bool hasFP(const MachineFunction &MF) const override; @@ -91,7 +98,8 @@ private: const HexagonRegisterInfo &HRI, bool &PrologueStubs) const; bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI) const; - bool updateExitPaths(MachineBasicBlock &MBB, MachineBasicBlock *RestoreB, + void updateEntryPaths(MachineFunction &MF, MachineBasicBlock &SaveB) const; + bool updateExitPaths(MachineBasicBlock &MBB, MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF, BitVector &Path) const; void insertCFIInstructionsAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator At) const; @@ -140,11 +148,12 @@ private: void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI, bool IsDef, bool IsKill) const; - bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const; + bool shouldInlineCSR(MachineFunction &MF, const CSIVect &CSI) const; bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; + bool mayOverflowFrameOffset(MachineFunction &MF) const; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp index f46b6d2..bb5e379 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -7,20 +7,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <iterator> using namespace llvm; @@ -41,28 +46,34 @@ static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden, cl::desc("Require & in extract patterns")); namespace llvm { + void initializeHexagonGenExtractPass(PassRegistry&); FunctionPass *createHexagonGenExtract(); -} +} // end namespace llvm namespace { + class HexagonGenExtract : public FunctionPass { public: static char ID; + HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) { initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry()); } - virtual const char *getPassName() const override { + + StringRef getPassName() const override { return "Hexagon generate \"extract\" instructions"; } - virtual bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<MachineFunctionAnalysis>(); FunctionPass::getAnalysisUsage(AU); } + private: bool visitBlock(BasicBlock *B); bool convert(Instruction *In); @@ -72,7 +83,8 @@ namespace { }; char HexagonGenExtract::ID = 0; -} + +} // end anonymous namespace INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate " "\"extract\" instructions", false, false) @@ -80,11 +92,11 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate " "\"extract\" instructions", false, false) - bool HexagonGenExtract::convert(Instruction *In) { using namespace PatternMatch; - Value *BF = 0; - ConstantInt *CSL = 0, *CSR = 0, *CM = 0; + + Value *BF = nullptr; + ConstantInt *CSL = nullptr, *CSR = nullptr, *CM = nullptr; BasicBlock *BB = In->getParent(); LLVMContext &Ctx = BB->getContext(); bool LogicalSR; @@ -126,14 +138,14 @@ bool HexagonGenExtract::convert(Instruction *In) { m_ConstantInt(CM))); } if (!Match) { - CM = 0; + CM = nullptr; // (shl (lshr x, #sr), #sl) LogicalSR = true; Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), m_ConstantInt(CSL))); } if (!Match) { - CM = 0; + CM = nullptr; // (shl (ashr x, #sr), #sl) LogicalSR = false; Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), @@ -207,7 +219,6 @@ bool HexagonGenExtract::convert(Instruction *In) { return true; } - bool HexagonGenExtract::visitBlock(BasicBlock *B) { // Depth-first, bottom-up traversal. DomTreeNode *DTN = DT->getNode(B); @@ -240,7 +251,6 @@ bool HexagonGenExtract::visitBlock(BasicBlock *B) { return Changed; } - bool HexagonGenExtract::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -256,7 +266,6 @@ bool HexagonGenExtract::runOnFunction(Function &F) { return Changed; } - FunctionPass *llvm::createHexagonGenExtract() { return new HexagonGenExtract(); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index 71d0791..5a8e392 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -9,29 +9,39 @@ #define DEBUG_TYPE "hexinsert" +#include "BitTracker.h" +#include "HexagonBitTracker.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Timer.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Timer.h" #include "llvm/Target/TargetRegisterInfo.h" - -#include "Hexagon.h" -#include "HexagonRegisterInfo.h" -#include "HexagonTargetMachine.h" -#include "HexagonBitTracker.h" - +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> #include <vector> using namespace llvm; @@ -59,20 +69,18 @@ static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, cl::ZeroOrMore); -namespace { - // The preprocessor gets confused when the DEBUG macro is passed larger - // chunks of code. Use this function to detect debugging. - inline bool isDebug() { +// The preprocessor gets confused when the DEBUG macro is passed larger +// chunks of code. Use this function to detect debugging. +inline static bool isDebug() { #ifndef NDEBUG - return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE); + return DebugFlag && isCurrentDebugType(DEBUG_TYPE); #else - return false; + return false; #endif - } } - namespace { + // Set of virtual registers, based on BitVector. struct RegisterSet : private BitVector { RegisterSet() = default; @@ -146,20 +154,23 @@ namespace { if (size() <= Idx) resize(std::max(Idx+1, 32U)); } + static inline unsigned v2x(unsigned v) { return TargetRegisterInfo::virtReg2Index(v); } + static inline unsigned x2v(unsigned x) { return TargetRegisterInfo::index2VirtReg(x); } }; - struct PrintRegSet { PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) : RS(S), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P); + private: const RegisterSet &RS; const TargetRegisterInfo *TRI; @@ -172,14 +183,12 @@ namespace { OS << " }"; return OS; } -} - -namespace { // A convenience class to associate unsigned numbers (such as virtual // registers) with unsigned numbers. struct UnsignedMap : public DenseMap<unsigned,unsigned> { - UnsignedMap() : BaseType() {} + UnsignedMap() = default; + private: typedef DenseMap<unsigned,unsigned> BaseType; }; @@ -190,22 +199,21 @@ namespace { // by a potentially expensive comparison function, or obtained by a proce- // dure that should not be repeated each time two registers are compared. struct RegisterOrdering : public UnsignedMap { - RegisterOrdering() : UnsignedMap() {} + RegisterOrdering() = default; + unsigned operator[](unsigned VR) const { const_iterator F = find(VR); assert(F != end()); return F->second; } + // Add operator(), so that objects of this class can be used as // comparators in std::sort et al. bool operator() (unsigned VR1, unsigned VR2) const { return operator[](VR1) < operator[](VR2); } }; -} - -namespace { // Ordering of bit values. This class does not have operator[], but // is supplies a comparison operator() for use in std:: algorithms. // The order is as follows: @@ -214,12 +222,14 @@ namespace { // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. struct BitValueOrdering { BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} + bool operator() (const BitTracker::BitValue &V1, const BitTracker::BitValue &V2) const; + const RegisterOrdering &BaseOrd; }; -} +} // end anonymous namespace bool BitValueOrdering::operator() (const BitTracker::BitValue &V1, const BitTracker::BitValue &V2) const { @@ -241,20 +251,21 @@ bool BitValueOrdering::operator() (const BitTracker::BitValue &V1, return V1.RefI.Pos < V2.RefI.Pos; } - namespace { + // Cache for the BitTracker's cell map. Map lookup has a logarithmic // complexity, this class will memoize the lookup results to reduce // the access time for repeated lookups of the same cell. struct CellMapShadow { CellMapShadow(const BitTracker &T) : BT(T) {} + const BitTracker::RegisterCell &lookup(unsigned VR) { unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); // Grow the vector to at least 32 elements. if (RInd >= CVect.size()) - CVect.resize(std::max(RInd+16, 32U), 0); + CVect.resize(std::max(RInd+16, 32U), nullptr); const BitTracker::RegisterCell *CP = CVect[RInd]; - if (CP == 0) + if (CP == nullptr) CP = CVect[RInd] = &BT.lookup(VR); return *CP; } @@ -265,16 +276,15 @@ namespace { typedef std::vector<const BitTracker::RegisterCell*> CellVectType; CellVectType CVect; }; -} - -namespace { // Comparator class for lexicographic ordering of virtual registers // according to the corresponding BitTracker::RegisterCell objects. struct RegisterCellLexCompare { RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) : BitOrd(BO), CM(M) {} + bool operator() (unsigned VR1, unsigned VR2) const; + private: const BitValueOrdering &BitOrd; CellMapShadow &CM; @@ -290,15 +300,17 @@ namespace { RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, const BitValueOrdering &BO, CellMapShadow &M) : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} + bool operator() (unsigned VR1, unsigned VR2) const; + private: const unsigned SelR, SelB; const unsigned BitN; const BitValueOrdering &BitOrd; CellMapShadow &CM; }; -} +} // end anonymous namespace bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { // Ordering of registers, made up from two given orderings: @@ -327,7 +339,6 @@ bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; } - bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { if (VR1 == VR2) return false; @@ -353,18 +364,22 @@ bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { return false; } - namespace { + class OrderedRegisterList { typedef std::vector<unsigned> ListType; + public: OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {} + void insert(unsigned VR); void remove(unsigned VR); + unsigned operator[](unsigned Idx) const { assert(Idx < Seq.size()); return Seq[Idx]; } + unsigned size() const { return Seq.size(); } @@ -378,16 +393,18 @@ namespace { // Convenience function to convert an iterator to the corresponding index. unsigned idx(iterator It) const { return It-begin(); } + private: ListType Seq; const RegisterOrdering &Ord; }; - struct PrintORL { PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) : RL(L), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); + private: const OrderedRegisterList &RL; const TargetRegisterInfo *TRI; @@ -404,8 +421,8 @@ namespace { OS << ')'; return OS; } -} +} // end anonymous namespace void OrderedRegisterList::insert(unsigned VR) { iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); @@ -415,21 +432,21 @@ void OrderedRegisterList::insert(unsigned VR) { Seq.insert(L, VR); } - void OrderedRegisterList::remove(unsigned VR) { iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); assert(L != Seq.end()); Seq.erase(L); } - namespace { + // A record of the insert form. The fields correspond to the operands // of the "insert" instruction: // ... = insert(SrcR, InsR, #Wdh, #Off) struct IFRecord { IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} + unsigned SrcR, InsR; uint16_t Wdh, Off; }; @@ -437,10 +454,12 @@ namespace { struct PrintIFR { PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) : IFR(R), TRI(RI) {} + private: + friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); + const IFRecord &IFR; const TargetRegisterInfo *TRI; - friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); }; raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { @@ -451,31 +470,37 @@ namespace { } typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet; -} +} // end anonymous namespace namespace llvm { + void initializeHexagonGenInsertPass(PassRegistry&); FunctionPass *createHexagonGenInsert(); -} +} // end namespace llvm namespace { + class HexagonGenInsert : public MachineFunctionPass { public: static char ID; - HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) { + + HexagonGenInsert() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) { initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); } - virtual const char *getPassName() const { + + StringRef getPassName() const override { return "Hexagon generate \"insert\" instructions"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + + bool runOnMachineFunction(MachineFunction &MF) override; private: typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType; @@ -533,8 +558,8 @@ namespace { }; char HexagonGenInsert::ID = 0; -} +} // end anonymous namespace void HexagonGenInsert::dump_map() const { typedef IFMapType::const_iterator iterator; @@ -547,7 +572,6 @@ void HexagonGenInsert::dump_map() const { } } - void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { unsigned Index = 0; typedef MachineFunction::const_iterator mf_iterator; @@ -574,7 +598,6 @@ void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { // in the map. } - void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const { // Create a vector of all virtual registers (collect them from the base @@ -591,12 +614,10 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, RO.insert(std::make_pair(VRs[i], i)); } - inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; } - bool HexagonGenInsert::isConstant(unsigned VR) const { const BitTracker::RegisterCell &RC = CMS->lookup(VR); uint16_t W = RC.width(); @@ -609,7 +630,6 @@ bool HexagonGenInsert::isConstant(unsigned VR) const { return true; } - bool HexagonGenInsert::isSmallConstant(unsigned VR) const { const BitTracker::RegisterCell &RC = CMS->lookup(VR); uint16_t W = RC.width(); @@ -633,7 +653,6 @@ bool HexagonGenInsert::isSmallConstant(unsigned VR) const { return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); } - bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, uint16_t L, uint16_t S) const { const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); @@ -656,7 +675,6 @@ bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, return true; } - bool HexagonGenInsert::findSelfReference(unsigned VR) const { const BitTracker::RegisterCell &RC = CMS->lookup(VR); for (uint16_t i = 0, w = RC.width(); i < w; ++i) { @@ -667,7 +685,6 @@ bool HexagonGenInsert::findSelfReference(unsigned VR) const { return false; } - bool HexagonGenInsert::findNonSelfReference(unsigned VR) const { BitTracker::RegisterCell RC = CMS->lookup(VR); for (uint16_t i = 0, w = RC.width(); i < w; ++i) { @@ -678,7 +695,6 @@ bool HexagonGenInsert::findNonSelfReference(unsigned VR) const { return false; } - void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const { for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { @@ -692,7 +708,6 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, } } - void HexagonGenInsert::getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const { for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { @@ -706,7 +721,6 @@ void HexagonGenInsert::getInstrUses(const MachineInstr *MI, } } - unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, const MachineBasicBlock *ToB, const UnsignedMap &RPO, PairMapType &M) const { @@ -740,7 +754,6 @@ unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, return MaxD; } - unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, PairMapType &M) const { @@ -753,11 +766,10 @@ unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, return D1+D2+D3; } - bool HexagonGenInsert::findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs) { if (isDebug()) { - dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI) + dbgs() << __func__ << ": " << PrintReg(VR, HRI) << " AVs: " << PrintORL(AVs, HRI) << "\n"; } if (AVs.size() == 0) @@ -832,7 +844,6 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR, } } - bool Recorded = false; for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { @@ -888,7 +899,6 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR, return Recorded; } - void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs) { if (isDebug()) @@ -949,7 +959,6 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, AVs.remove(VR); } - void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, RegisterSet &RMs) const { // For a given register VR and a insert form, find the registers that are @@ -1001,7 +1010,6 @@ void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, RMs.remove(VR); } - void HexagonGenInsert::computeRemovableRegisters() { for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { IFListType &LL = I->second; @@ -1010,21 +1018,19 @@ void HexagonGenInsert::computeRemovableRegisters() { } } - void HexagonGenInsert::pruneEmptyLists() { // Remove all entries from the map, where the register has no insert forms // associated with it. typedef SmallVector<IFMapType::iterator,16> IterListType; IterListType Prune; for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - if (I->second.size() == 0) + if (I->second.empty()) Prune.push_back(I); } for (unsigned i = 0, n = Prune.size(); i < n; ++i) IFMap.erase(Prune[i]); } - void HexagonGenInsert::pruneCoveredSets(unsigned VR) { IFMapType::iterator F = IFMap.find(VR); assert(F != IFMap.end()); @@ -1038,7 +1044,7 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) { // If there exists a candidate with a non-empty set, the ones with empty // sets will not be used and can be removed. MachineInstr *DefVR = MRI->getVRegDef(VR); - bool DefEx = HII->isConstExtended(DefVR); + bool DefEx = HII->isConstExtended(*DefVR); bool HasNE = false; for (unsigned i = 0, n = LL.size(); i < n; ++i) { if (LL[i].second.empty()) @@ -1052,7 +1058,7 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) { auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { return IR.second.empty(); }; - auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty); + auto End = llvm::remove_if(LL, IsEmpty); if (End != LL.end()) LL.erase(End, LL.end()); } else { @@ -1112,7 +1118,6 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) { } } - void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M) { IFMapType::iterator F = IFMap.find(VR); @@ -1135,7 +1140,6 @@ void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, } } - void HexagonGenInsert::pruneRegCopies(unsigned VR) { IFMapType::iterator F = IFMap.find(VR); assert(F != IFMap.end()); @@ -1144,12 +1148,11 @@ void HexagonGenInsert::pruneRegCopies(unsigned VR) { auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); }; - auto End = std::remove_if(LL.begin(), LL.end(), IsCopy); + auto End = llvm::remove_if(LL, IsCopy); if (End != LL.end()) LL.erase(End, LL.end()); } - void HexagonGenInsert::pruneCandidates() { // Remove candidates that are not beneficial, regardless of the final // selection method. @@ -1176,8 +1179,8 @@ void HexagonGenInsert::pruneCandidates() { pruneRegCopies(I->first); } - namespace { + // Class for comparing IF candidates for registers that have multiple of // them. The smaller the candidate, according to this ordering, the better. // First, compare the number of zeros in the associated potentially remova- @@ -1189,16 +1192,19 @@ namespace { struct IFOrdering { IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) : UseC(UC), BaseOrd(BO) {} + bool operator() (const IFRecordWithRegSet &A, const IFRecordWithRegSet &B) const; + private: void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, unsigned &Sum) const; + const UnsignedMap &UseC; const RegisterOrdering &BaseOrd; }; -} +} // end anonymous namespace bool IFOrdering::operator() (const IFRecordWithRegSet &A, const IFRecordWithRegSet &B) const { @@ -1228,7 +1234,6 @@ bool IFOrdering::operator() (const IFRecordWithRegSet &A, return A.first.Off < B.first.Off; } - void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, unsigned &Sum) const { for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { @@ -1242,7 +1247,6 @@ void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, } } - void HexagonGenInsert::selectCandidates() { // Some registers may have multiple valid candidates. Pick the best one // (or decide not to use any). @@ -1280,7 +1284,6 @@ void HexagonGenInsert::selectCandidates() { UseC[R] = (C > D) ? C-D : 0; // doz } - bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; if (!SelectAll0 && !SelectHas0) SelectAll0 = true; @@ -1345,12 +1348,12 @@ void HexagonGenInsert::selectCandidates() { AllRMs.clear(); for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { const IFListType &LL = I->second; - if (LL.size() > 0) + if (!LL.empty()) AllRMs.insert(LL[0].second); } for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { IFListType &LL = I->second; - if (LL.size() == 0) + if (LL.empty()) continue; unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; if (AllRMs[SR] || AllRMs[IR]) @@ -1360,7 +1363,6 @@ void HexagonGenInsert::selectCandidates() { pruneEmptyLists(); } - bool HexagonGenInsert::generateInserts() { // Create a new register for each one from IFMap, and store them in the // map. @@ -1387,9 +1389,9 @@ bool HexagonGenInsert::generateInserts() { unsigned Wdh = IF.Wdh, Off = IF.Off; unsigned InsS = 0; if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { - InsS = Hexagon::subreg_loreg; + InsS = Hexagon::isub_lo; if (Off >= 32) { - InsS = Hexagon::subreg_hireg; + InsS = Hexagon::isub_hi; Off -= 32; } } @@ -1418,7 +1420,6 @@ bool HexagonGenInsert::generateInserts() { return true; } - bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { bool Changed = false; typedef GraphTraits<MachineDomTreeNode*> GTN; @@ -1444,10 +1445,10 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { bool AllDead = true; SmallVector<unsigned,2> Regs; - for (ConstMIOperands Op(*MI); Op.isValid(); ++Op) { - if (!Op->isReg() || !Op->isDef()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = Op->getReg(); + unsigned R = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(R) || !MRI->use_nodbg_empty(R)) { AllDead = false; @@ -1467,15 +1468,12 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { return Changed; } - bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; bool Changed = false; - TimerGroup __G("hexinsert"); - NamedRegionTimer __T("hexinsert", Timing && !TimingDetail); // Sanity check: one, but not both. assert(!OptSelectAll0 || !OptSelectHas0); @@ -1521,8 +1519,12 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *RootB = MDT->getRoot(); OrderedRegisterList AvailR(CellOrd); + const char *const TGName = "hexinsert"; + const char *const TGDesc = "Generate Insert Instructions"; + { - NamedRegionTimer _T("collection", "hexinsert", TimingDetail); + NamedRegionTimer _T("collection", "collection", TGName, TGDesc, + TimingDetail); collectInBlock(RootB, AvailR); // Complete the information gathered in IFMap. computeRemovableRegisters(); @@ -1537,7 +1539,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { return Changed; { - NamedRegionTimer _T("pruning", "hexinsert", TimingDetail); + NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail); pruneCandidates(); } @@ -1550,7 +1552,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { return Changed; { - NamedRegionTimer _T("selection", "hexinsert", TimingDetail); + NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail); selectCandidates(); } @@ -1576,19 +1578,18 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { return Changed; { - NamedRegionTimer _T("generation", "hexinsert", TimingDetail); + NamedRegionTimer _T("generation", "generation", TGName, TGDesc, + TimingDetail); generateInserts(); } return true; } - FunctionPass *llvm::createHexagonGenInsert() { return new HexagonGenInsert(); } - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp index bb9256d..a718df9 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -22,36 +22,60 @@ #define DEBUG_TYPE "hexmux" -#include "llvm/CodeGen/Passes.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <limits> +#include <iterator> +#include <utility> using namespace llvm; namespace llvm { + FunctionPass *createHexagonGenMux(); void initializeHexagonGenMuxPass(PassRegistry& Registry); -} + +} // end namespace llvm namespace { + class HexagonGenMux : public MachineFunctionPass { public: static char ID; - HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) { + + HexagonGenMux() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) { initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const override { + + StringRef getPassName() const override { return "Hexagon generate mux instructions"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } private: @@ -59,26 +83,33 @@ namespace { const HexagonRegisterInfo *HRI; struct CondsetInfo { - unsigned PredR; - unsigned TrueX, FalseX; - CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {} + unsigned PredR = 0; + unsigned TrueX = std::numeric_limits<unsigned>::max(); + unsigned FalseX = std::numeric_limits<unsigned>::max(); + + CondsetInfo() = default; }; + struct DefUseInfo { BitVector Defs, Uses; - DefUseInfo() : Defs(), Uses() {} + + DefUseInfo() = default; DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {} }; + struct MuxInfo { MachineBasicBlock::iterator At; unsigned DefR, PredR; MachineOperand *SrcT, *SrcF; MachineInstr *Def1, *Def2; + MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR, MachineOperand *TOp, MachineOperand *FOp, MachineInstr &D1, MachineInstr &D2) : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(&D1), Def2(&D2) {} }; + typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap; typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap; typedef SmallVector<MuxInfo,4> MuxInfoList; @@ -86,6 +117,7 @@ namespace { bool isRegPair(unsigned Reg) const { return Hexagon::DoubleRegsRegClass.contains(Reg); } + void getSubRegs(unsigned Reg, BitVector &SRs) const; void expandReg(unsigned Reg, BitVector &Set) const; void getDefsUses(const MachineInstr *MI, BitVector &Defs, @@ -99,18 +131,17 @@ namespace { }; char HexagonGenMux::ID = 0; -} + +} // end anonymous namespace INITIALIZE_PASS(HexagonGenMux, "hexagon-mux", "Hexagon generate mux instructions", false, false) - void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const { for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I) SRs[*I] = true; } - void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const { if (isRegPair(Reg)) getSubRegs(Reg, Set); @@ -118,7 +149,6 @@ void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const { Set[Reg] = true; } - void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, BitVector &Uses) const { // First, get the implicit defs and uses for this instruction. @@ -132,16 +162,15 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, expandReg(*R++, Uses); // Look over all operands, and collect explicit defs and uses. - for (ConstMIOperands Mo(*MI); Mo.isValid(); ++Mo) { - if (!Mo->isReg() || Mo->isImplicit()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isImplicit()) continue; - unsigned R = Mo->getReg(); - BitVector &Set = Mo->isDef() ? Defs : Uses; + unsigned R = MO.getReg(); + BitVector &Set = MO.isDef() ? Defs : Uses; expandReg(R, Set); } } - void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, DefUseInfoMap &DUM) { unsigned Index = 0; @@ -159,7 +188,6 @@ void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, } } - bool HexagonGenMux::isCondTransfer(unsigned Opc) const { switch (Opc) { case Hexagon::A2_tfrt: @@ -171,7 +199,6 @@ bool HexagonGenMux::isCondTransfer(unsigned Opc) const { return false; } - unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1, const MachineOperand &Src2) const { bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg(); @@ -188,7 +215,6 @@ unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1, return 0; } - bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { bool Changed = false; InstrIndexMap I2X; @@ -231,7 +257,8 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { CI.TrueX = Idx; else CI.FalseX = Idx; - if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX) + if (CI.TrueX == std::numeric_limits<unsigned>::max() || + CI.FalseX == std::numeric_limits<unsigned>::max()) continue; // There is now a complete definition of DR, i.e. we have the predicate diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index dcfd3e8..f14c733 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -9,49 +9,68 @@ #define DEBUG_TYPE "gen-pred" -#include "HexagonTargetMachine.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" - -#include <functional> +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <iterator> +#include <map> #include <queue> #include <set> +#include <utility> using namespace llvm; namespace llvm { + void initializeHexagonGenPredicatePass(PassRegistry& Registry); FunctionPass *createHexagonGenPredicate(); -} + +} // end namespace llvm namespace { + struct Register { unsigned R, S; + Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {} Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {} + bool operator== (const Register &Reg) const { return R == Reg.R && S == Reg.S; } + bool operator< (const Register &Reg) const { return R < Reg.R || (R == Reg.R && S < Reg.S); } }; + struct PrintRegister { - PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {} friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR); + + PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {} + private: Register Reg; const TargetRegisterInfo &TRI; }; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) LLVM_ATTRIBUTE_UNUSED; raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) { @@ -61,18 +80,23 @@ namespace { class HexagonGenPredicate : public MachineFunctionPass { public: static char ID; - HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) { + + HexagonGenPredicate() : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), + MRI(nullptr) { initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry()); } - virtual const char *getPassName() const { + + StringRef getPassName() const override { return "Hexagon generate predicate operations"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + + bool runOnMachineFunction(MachineFunction &MF) override; private: typedef SetVector<MachineInstr*> VectOfInst; @@ -99,7 +123,8 @@ namespace { }; char HexagonGenPredicate::ID = 0; -} + +} // end anonymous namespace INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred", "Hexagon generate predicate operations", false, false) @@ -114,7 +139,6 @@ bool HexagonGenPredicate::isPredReg(unsigned R) { return RC == &Hexagon::PredRegsRegClass; } - unsigned HexagonGenPredicate::getPredForm(unsigned Opc) { using namespace Hexagon; @@ -159,7 +183,6 @@ unsigned HexagonGenPredicate::getPredForm(unsigned Opc) { return 0; } - bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); if (getPredForm(Opc) != 0) @@ -179,7 +202,6 @@ bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) { return false; } - void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) { MachineBasicBlock &B = *A; @@ -200,9 +222,8 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { } } - void HexagonGenPredicate::processPredicateGPR(const Register &Reg) { - DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " + DEBUG(dbgs() << __func__ << ": " << PrintReg(Reg.R, TRI, Reg.S) << "\n"); typedef MachineRegisterInfo::use_iterator use_iterator; use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end(); @@ -220,7 +241,6 @@ void HexagonGenPredicate::processPredicateGPR(const Register &Reg) { } } - Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { // Create a predicate register for a given Reg. The newly created register // will have its value copied from Reg, so that it can be later used as @@ -230,7 +250,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { if (F != G2P.end()) return F->second; - DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI)); + DEBUG(dbgs() << __func__ << ": " << PrintRegister(Reg, *TRI)); MachineInstr *DefI = MRI->getVRegDef(Reg.R); assert(DefI); unsigned Opc = DefI->getOpcode(); @@ -261,7 +281,6 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { llvm_unreachable("Invalid argument"); } - bool HexagonGenPredicate::isScalarCmp(unsigned Opc) { switch (Opc) { case Hexagon::C2_cmpeq: @@ -298,7 +317,6 @@ bool HexagonGenPredicate::isScalarCmp(unsigned Opc) { return false; } - bool HexagonGenPredicate::isScalarPred(Register PredReg) { std::queue<Register> WorkQ; WorkQ.push(PredReg); @@ -330,9 +348,9 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) { case Hexagon::C4_or_orn: case Hexagon::C2_xor: // Add operands to the queue. - for (ConstMIOperands Mo(*DefI); Mo.isValid(); ++Mo) - if (Mo->isReg() && Mo->isUse()) - WorkQ.push(Register(Mo->getReg())); + for (const MachineOperand &MO : DefI->operands()) + if (MO.isReg() && MO.isUse()) + WorkQ.push(Register(MO.getReg())); break; // All non-vector compares are ok, everything else is bad. @@ -344,9 +362,8 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) { return true; } - bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { - DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI); + DEBUG(dbgs() << __func__ << ": " << MI << " " << *MI); unsigned Opc = MI->getOpcode(); assert(isConvertibleToPredForm(MI)); @@ -356,7 +373,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg(MO); - if (Reg.S && Reg.S != Hexagon::subreg_loreg) + if (Reg.S && Reg.S != Hexagon::isub_lo) return false; if (!PredGPRs.count(Reg)) return false; @@ -430,9 +447,8 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { return true; } - bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { - DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n"); + DEBUG(dbgs() << __func__ << "\n"); const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; bool Changed = false; VectOfInst Erase; @@ -474,7 +490,6 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { return Changed; } - bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -518,8 +533,6 @@ bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { return Changed; } - FunctionPass *llvm::createHexagonGenPredicate() { return new HexagonGenPredicate(); } - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index cc154c4..e477dcc 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -25,22 +25,37 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallSet.h" -#include "Hexagon.h" +#include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/PassSupport.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include <algorithm> +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <map> +#include <set> +#include <utility> #include <vector> using namespace llvm; @@ -60,15 +75,26 @@ static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist")); +// Turn it off by default. If a preheader block is not created here, the +// software pipeliner may be unable to find a block suitable to serve as +// a preheader. In that case SWP will not run. +static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader " + "instructions")); + STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); namespace llvm { + FunctionPass *createHexagonHardwareLoops(); void initializeHexagonHardwareLoopsPass(PassRegistry&); -} + +} // end namespace llvm namespace { + class CountValue; + struct HexagonHardwareLoops : public MachineFunctionPass { MachineLoopInfo *MLI; MachineRegisterInfo *MRI; @@ -87,7 +113,7 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const override { return "Hexagon Hardware Loops"; } + StringRef getPassName() const override { return "Hexagon Hardware Loops"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineDominatorTree>(); @@ -138,7 +164,6 @@ namespace { static bool isUnsigned(Kind Cmp) { return (Cmp & U); } - }; /// \brief Find the register that contains the loop controlling @@ -289,6 +314,7 @@ namespace { CV_Register, CV_Immediate }; + private: CountValueType Kind; union Values { @@ -309,6 +335,7 @@ namespace { Contents.ImmVal = v; } } + bool isReg() const { return Kind == CV_Register; } bool isImm() const { return Kind == CV_Immediate; } @@ -330,8 +357,8 @@ namespace { if (isImm()) { OS << Contents.ImmVal; } } }; -} // end anonymous namespace +} // end anonymous namespace INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", "Hexagon Hardware Loops", false, false) @@ -366,28 +393,15 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { return Changed; } -/// \brief Return the latch block if it's one of the exiting blocks. Otherwise, -/// return the exiting block. Return 'null' when multiple exiting blocks are -/// present. -static MachineBasicBlock* getExitingBlock(MachineLoop *L) { - if (MachineBasicBlock *Latch = L->getLoopLatch()) { - if (L->isLoopExiting(Latch)) - return Latch; - else - return L->getExitingBlock(); - } - return nullptr; -} - bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, unsigned &Reg, int64_t &IVBump, MachineInstr *&IVOp ) const { MachineBasicBlock *Header = L->getHeader(); - MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); MachineBasicBlock *Latch = L->getLoopLatch(); - MachineBasicBlock *ExitingBlock = getExitingBlock(L); + MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); if (!Header || !Preheader || !Latch || !ExitingBlock) return false; @@ -417,10 +431,8 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, unsigned PhiOpReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiOpReg); - unsigned UpdOpc = DI->getOpcode(); - bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp); - if (isAdd) { + if (DI->getDesc().isAdd()) { // If the register operand to the add is the PHI we're looking at, this // meets the induction pattern. unsigned IndReg = DI->getOperand(1).getReg(); @@ -555,7 +567,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, // Look for the cmp instruction to determine if we can get a useful trip // count. The trip count can be either a register or an immediate. The // location of the value depends upon the type (reg or imm). - MachineBasicBlock *ExitingBlock = getExitingBlock(L); + MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); if (!ExitingBlock) return nullptr; @@ -566,7 +578,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, if (!FoundIV) return nullptr; - MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); MachineOperand *InitialValue = nullptr; MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); @@ -593,7 +605,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, // the fall through can go to the header. assert (TB && "Exit block without a branch?"); if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { - MachineBasicBlock *LTB = 0, *LFB = 0; + MachineBasicBlock *LTB = nullptr, *LFB = nullptr; SmallVector<MachineOperand,2> LCond; bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); if (NotAnalyzed) @@ -787,7 +799,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, if (!isPowerOf2_64(std::abs(IVBump))) return nullptr; - MachineBasicBlock *PH = Loop->getLoopPreheader(); + MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader); assert (PH && "Should have a preheader by now"); MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); DebugLoc DL; @@ -951,8 +963,8 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, // Call is not allowed because the callee may use a hardware loop except for // the case when the call never returns. - if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr) - return true; + if (MI->getDesc().isCall()) + return !TII->doesNotReturn(*MI); // Check if the instruction defines a hardware loop register. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -1138,7 +1150,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, if (containsInvalidInstruction(L, IsInnerHWLoop)) return false; - MachineBasicBlock *LastMBB = getExitingBlock(L); + MachineBasicBlock *LastMBB = L->findLoopControlBlock(); // Don't generate hw loop if the loop has more than one exit. if (!LastMBB) return false; @@ -1153,7 +1165,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, // Ensure the loop has a preheader: the loop instruction will be // placed there. - MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); if (!Preheader) { Preheader = createPreheaderForLoop(L); if (!Preheader) @@ -1180,10 +1192,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, // Determine the loop start. MachineBasicBlock *TopBlock = L->getTopBlock(); - MachineBasicBlock *ExitingBlock = getExitingBlock(L); - MachineBasicBlock *LoopStart = 0; + MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); + MachineBasicBlock *LoopStart = nullptr; if (ExitingBlock != L->getLoopLatch()) { - MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TB = nullptr, *FB = nullptr; SmallVector<MachineOperand, 2> Cond; if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false)) @@ -1254,7 +1266,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, if (LastI != LastMBB->end()) LastI = LastMBB->erase(LastI); SmallVector<MachineOperand, 0> Cond; - TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); + TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); } } else { // Conditional branch to loop start; just delete it. @@ -1423,12 +1435,13 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) continue; - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 2> Cond; if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) continue; - Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0); + Comparison::Kind Cmp = + getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0); if (Cmp == 0) continue; if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) @@ -1479,8 +1492,8 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, case TargetOpcode::COPY: case Hexagon::A2_tfrsi: case Hexagon::A2_tfrpi: - case Hexagon::CONST32_Int_Real: - case Hexagon::CONST64_Int_Real: { + case Hexagon::CONST32: + case Hexagon::CONST64: { // Call recursively to avoid an extra check whether operand(1) is // indeed an immediate (it could be a global address, for example), // plus we can handle COPY at the same time. @@ -1509,9 +1522,9 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, return false; unsigned Sub2 = DI->getOperand(2).getImm(); unsigned Sub4 = DI->getOperand(4).getImm(); - if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg) + if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi) TV = V1 | (V3 << 32); - else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg) + else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo) TV = V3 | (V1 << 32); else llvm_unreachable("Unexpected form of REG_SEQUENCE"); @@ -1522,13 +1535,13 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, return false; } - // By now, we should have successfuly obtained the immediate value defining + // By now, we should have successfully obtained the immediate value defining // the register referenced in MO. Handle a potential use of a subregister. switch (MO.getSubReg()) { - case Hexagon::subreg_loreg: + case Hexagon::isub_lo: Val = TV & 0xFFFFFFFFULL; break; - case Hexagon::subreg_hireg: + case Hexagon::isub_hi: Val = (TV >> 32) & 0xFFFFFFFFULL; break; default: @@ -1569,7 +1582,7 @@ static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineBasicBlock *Header = L->getHeader(); MachineBasicBlock *Latch = L->getLoopLatch(); - MachineBasicBlock *ExitingBlock = getExitingBlock(L); + MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); if (!(Header && Latch && ExitingBlock)) return false; @@ -1598,10 +1611,8 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { unsigned PhiReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiReg); - unsigned UpdOpc = DI->getOpcode(); - bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp); - if (isAdd) { + if (DI->getDesc().isAdd()) { // If the register operand to the add/sub is the PHI we are looking // at, this meets the induction pattern. unsigned IndReg = DI->getOperand(1).getReg(); @@ -1626,7 +1637,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { return false; if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { - MachineBasicBlock *LTB = 0, *LFB = 0; + MachineBasicBlock *LTB = nullptr, *LFB = nullptr; SmallVector<MachineOperand,2> LCond; bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); if (NotAnalyzed) @@ -1764,7 +1775,8 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { // It is not valid to do this transformation on an unsigned comparison // because it may underflow. - Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0); + Comparison::Kind Cmp = + getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0); if (!Cmp || Comparison::isUnsigned(Cmp)) return false; @@ -1807,18 +1819,17 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { return false; } -/// \brief Create a preheader for a given loop. +/// createPreheaderForLoop - Create a preheader for a given loop. MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( MachineLoop *L) { - if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) + if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader)) return TmpPH; - if (!HWCreatePreheader) return nullptr; MachineBasicBlock *Header = L->getHeader(); MachineBasicBlock *Latch = L->getLoopLatch(); - MachineBasicBlock *ExitingBlock = getExitingBlock(L); + MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); MachineFunction *MF = Header->getParent(); DebugLoc DL; @@ -1898,7 +1909,6 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( PN->addOperand(MachineOperand::CreateReg(NewPR, false)); PN->addOperand(MachineOperand::CreateMBB(NewPH)); } - } else { assert(Header->pred_size() == 2); @@ -1934,7 +1944,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( (void)NotAnalyzed; // suppress compiler warning assert (!NotAnalyzed && "Should be analyzable!"); if (TB != Header && (Tmp2.empty() || FB != Header)) - TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL); + TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL); PB->ReplaceUsesOfBlockWith(Header, NewPH); } } @@ -1946,10 +1956,10 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( (void)LatchNotAnalyzed; // suppress compiler warning assert (!LatchNotAnalyzed && "Should be analyzable!"); if (!TB && !FB) - TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL); + TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL); // Finally, the branch from the preheader to the header. - TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL); + TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL); NewPH->addSuccessor(Header); MachineLoop *ParentLoop = L->getParentLoop(); @@ -1958,9 +1968,12 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( // Update the dominator information with the new preheader. if (MDT) { - MachineDomTreeNode *HDom = MDT->getNode(Header); - MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock()); - MDT->changeImmediateDominator(Header, NewPH); + if (MachineDomTreeNode *HN = MDT->getNode(Header)) { + if (MachineDomTreeNode *DHN = HN->getIDom()) { + MDT->addNewBlock(NewPH, DHN->getBlock()); + MDT->changeImmediateDominator(Header, NewPH); + } + } } return NewPH; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp new file mode 100644 index 0000000..036b186 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp @@ -0,0 +1,140 @@ +//===-- HexagonHazardRecognizer.cpp - Hexagon Post RA Hazard Recognizer ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the hazard recognizer for scheduling on Hexagon. +// Use a DFA based hazard recognizer. +// +//===----------------------------------------------------------------------===// + +#include "HexagonHazardRecognizer.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> + +using namespace llvm; + +#define DEBUG_TYPE "post-RA-sched" + +void HexagonHazardRecognizer::Reset() { + DEBUG(dbgs() << "Reset hazard recognizer\n"); + Resources->clearResources(); + PacketNum = 0; + UsesDotCur = nullptr; + DotCurPNum = -1; + RegDefs.clear(); +} + +ScheduleHazardRecognizer::HazardType +HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) { + MachineInstr *MI = SU->getInstr(); + if (!MI || TII->isZeroCost(MI->getOpcode())) + return NoHazard; + + if (!Resources->canReserveResources(*MI)) { + DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI); + HazardType RetVal = Hazard; + if (TII->mayBeNewStore(*MI)) { + // Make sure the register to be stored is defined by an instruction in the + // packet. + MachineOperand &MO = MI->getOperand(MI->getNumOperands() - 1); + if (!MO.isReg() || RegDefs.count(MO.getReg()) == 0) + return Hazard; + // The .new store version uses different resources so check if it + // causes a hazard. + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *NewMI = + MF->CreateMachineInstr(TII->get(TII->getDotNewOp(*MI)), + MI->getDebugLoc()); + if (Resources->canReserveResources(*NewMI)) + RetVal = NoHazard; + DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard) << "\n"); + MF->DeleteMachineInstr(NewMI); + } + return RetVal; + } + + if (SU == UsesDotCur && DotCurPNum != (int)PacketNum) { + DEBUG(dbgs() << "*** .cur Hazard in cycle " << PacketNum << ", " << *MI); + return Hazard; + } + + return NoHazard; +} + +void HexagonHazardRecognizer::AdvanceCycle() { + DEBUG(dbgs() << "Advance cycle, clear state\n"); + Resources->clearResources(); + if (DotCurPNum != -1 && DotCurPNum != (int)PacketNum) { + UsesDotCur = nullptr; + DotCurPNum = -1; + } + PacketNum++; + RegDefs.clear(); +} + +/// If a packet contains a dot cur instruction, then we may prefer the +/// instruction that can use the dot cur result. Or, if the use +/// isn't scheduled in the same packet, then prefer other instructions +/// in the subsequent packet. +bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum)); +} + +void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) { + MachineInstr *MI = SU->getInstr(); + if (!MI) + return; + + // Keep the set of definitions for each packet, which is used to determine + // if a .new can be used. + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isDef() && !MO.isImplicit()) + RegDefs.insert(MO.getReg()); + + if (TII->isZeroCost(MI->getOpcode())) + return; + + if (!Resources->canReserveResources(*MI)) { + // It must be a .new store since other instructions must be able to be + // reserved at this point. + assert(TII->mayBeNewStore(*MI) && "Expecting .new store"); + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *NewMI = + MF->CreateMachineInstr(TII->get(TII->getDotNewOp(*MI)), + MI->getDebugLoc()); + assert(Resources->canReserveResources(*NewMI)); + Resources->reserveResources(*NewMI); + MF->DeleteMachineInstr(NewMI); + } + else + Resources->reserveResources(*MI); + DEBUG(dbgs() << " Add instruction " << *MI); + + // When scheduling a dot cur instruction, check if there is an instruction + // that can use the dot cur in the same packet. If so, we'll attempt to + // schedule it before other instructions. We only do this if the use has + // the same height as the dot cur. Otherwise, we may miss scheduling an + // instruction with a greater height, which is more important. + if (TII->mayBeCurLoad(*MI)) + for (auto &S : SU->Succs) + if (S.isAssignedRegDep() && S.getLatency() == 0 && + SU->getHeight() == S.getSUnit()->getHeight()) { + UsesDotCur = S.getSUnit(); + DotCurPNum = PacketNum; + break; + } + if (SU == UsesDotCur) { + UsesDotCur = nullptr; + DotCurPNum = -1; + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h new file mode 100644 index 0000000..70efcb7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h @@ -0,0 +1,78 @@ +//===--- HexagonHazardRecognizer.h - Hexagon Post RA Hazard Recognizer ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file defines the hazard recognizer for scheduling on Hexagon. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONPROFITRECOGNIZER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONPROFITRECOGNIZER_H + +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" + +namespace llvm { + +class HexagonHazardRecognizer : public ScheduleHazardRecognizer { + DFAPacketizer *Resources; + const HexagonInstrInfo *TII; + unsigned PacketNum; + // If the packet contains a potential dot cur instruction. This is + // used for the scheduling priority function. + SUnit *UsesDotCur; + // The packet number when a dor cur is emitted. If its use is not generated + // in the same packet, then try to wait another cycle before emitting. + int DotCurPNum; + // The set of registers defined by instructions in the current packet. + SmallSet<unsigned, 8> RegDefs; + +public: + HexagonHazardRecognizer(const InstrItineraryData *II, + const HexagonInstrInfo *HII, + const HexagonSubtarget &ST) + : Resources(ST.createDFAPacketizer(II)), TII(HII), PacketNum(0), + UsesDotCur(nullptr), DotCurPNum(-1) { } + + ~HexagonHazardRecognizer() override { + if (Resources) + delete Resources; + } + + /// This callback is invoked when a new block of instructions is about to be + /// scheduled. The hazard state is set to an initialized state. + void Reset() override; + + /// Return the hazard type of emitting this node. There are three + /// possible results. Either: + /// * NoHazard: it is legal to issue this instruction on this cycle. + /// * Hazard: issuing this instruction would stall the machine. If some + /// other instruction is available, issue it first. + HazardType getHazardType(SUnit *SU, int stalls) override; + + /// This callback is invoked when an instruction is emitted to be scheduled, + /// to advance the hazard state. + void EmitInstruction(SUnit *) override; + + /// This callback may be invoked if getHazardType returns NoHazard. If, even + /// though there is no hazard, it would be better to schedule another + /// available instruction, this callback should return true. + bool ShouldPreferAnother(SUnit *) override; + + /// This callback is invoked whenever the next top-down instruction to be + /// scheduled cannot issue in the current cycle, either because of latency + /// or resource conflicts. This should increment the internal state of the + /// hazard recognizer so that previously "Hazard" instructions will now not + /// be hazards. + void AdvanceCycle() override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONPROFITRECOGNIZER_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 22247aa..f6012d2 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -26,11 +26,22 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-isel" static -cl::opt<unsigned> -MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", - cl::Hidden, cl::init(2), - cl::desc("Maximum number of uses of a global address such that we still us a" - "constant extended instruction")); +cl::opt<bool> +EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), + cl::desc("Rebalance address calculation trees to improve " + "instruction selection")); + +// Rebalance only if this allows e.g. combining a GA with an offset or +// factoring out a shift. +static +cl::opt<bool> +RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), + cl::desc("Rebalance address tree only if this allows optimizations")); + +static +cl::opt<bool> +RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, + cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced")); //===----------------------------------------------------------------------===// // Instruction Selector Implementation @@ -42,14 +53,13 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", /// namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { - const HexagonTargetMachine &HTM; const HexagonSubtarget *HST; const HexagonInstrInfo *HII; const HexagonRegisterInfo *HRI; public: explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr), + : SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr), HRI(nullptr) {} bool runOnMachineFunction(MachineFunction &MF) override { @@ -61,8 +71,8 @@ public: return true; } - virtual void PreprocessISelDAG() override; - virtual void EmitFunctionEntryCode() override; + void PreprocessISelDAG() override; + void EmitFunctionEntryCode() override; void Select(SDNode *N) override; @@ -72,7 +82,7 @@ public: bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); bool SelectAddrFI(SDValue &N, SDValue &R); - const char *getPassName() const override { + StringRef getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; } @@ -92,7 +102,6 @@ public: std::vector<SDValue> &OutOps) override; bool tryLoadOfLoadIntrinsic(LoadSDNode *N); void SelectLoad(SDNode *N); - void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl); void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl); void SelectStore(SDNode *N); @@ -103,82 +112,27 @@ public: void SelectIntrinsicWOChain(SDNode *N); void SelectConstant(SDNode *N); void SelectConstantFP(SDNode *N); - void SelectAdd(SDNode *N); void SelectBitcast(SDNode *N); - void SelectBitOp(SDNode *N); - - // XformMskToBitPosU5Imm - Returns the bit position which - // the single bit 32 bit mask represents. - // Used in Clr and Set bit immediate memops. - SDValue XformMskToBitPosU5Imm(uint32_t Imm, const SDLoc &DL) { - int32_t bitPos; - bitPos = Log2_32(Imm); - assert(bitPos >= 0 && bitPos < 32 && - "Constant out of range for 32 BitPos Memops"); - return CurDAG->getTargetConstant(bitPos, DL, MVT::i32); - } - - // XformMskToBitPosU4Imm - Returns the bit position which the single-bit - // 16 bit mask represents. Used in Clr and Set bit immediate memops. - SDValue XformMskToBitPosU4Imm(uint16_t Imm, const SDLoc &DL) { - return XformMskToBitPosU5Imm(Imm, DL); - } - - // XformMskToBitPosU3Imm - Returns the bit position which the single-bit - // 8 bit mask represents. Used in Clr and Set bit immediate memops. - SDValue XformMskToBitPosU3Imm(uint8_t Imm, const SDLoc &DL) { - return XformMskToBitPosU5Imm(Imm, DL); - } - - // Return true if there is exactly one bit set in V, i.e., if V is one of the - // following integers: 2^0, 2^1, ..., 2^31. - bool ImmIsSingleBit(uint32_t v) const { - return isPowerOf2_32(v); - } - - // XformM5ToU5Imm - Return a target constant with the specified value, of - // type i32 where the negative literal is transformed into a positive literal - // for use in -= memops. - inline SDValue XformM5ToU5Imm(signed Imm, const SDLoc &DL) { - assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); - return CurDAG->getTargetConstant(-Imm, DL, MVT::i32); - } - - // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range - // [1..128], used in cmpb.gtu instructions. - inline SDValue XformU7ToU7M1Imm(signed Imm, const SDLoc &DL) { - assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); - return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8); - } - - // XformS8ToS8M1Imm - Return a target constant decremented by 1. - inline SDValue XformSToSM1Imm(signed Imm, const SDLoc &DL) { - return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); - } - - // XformU8ToU8M1Imm - Return a target constant decremented by 1. - inline SDValue XformUToUM1Imm(unsigned Imm, const SDLoc &DL) { - assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); - return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); - } - - // XformSToSM2Imm - Return a target constant decremented by 2. - inline SDValue XformSToSM2Imm(unsigned Imm, const SDLoc &DL) { - return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32); - } - - // XformSToSM3Imm - Return a target constant decremented by 3. - inline SDValue XformSToSM3Imm(unsigned Imm, const SDLoc &DL) { - return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32); - } // Include the pieces autogenerated from the target description. #include "HexagonGenDAGISel.inc" private: bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); - bool orIsAdd(const SDNode *N) const; + bool isOrEquivalentToAdd(const SDNode *N) const; bool isAlignedMemNode(const MemSDNode *N) const; + bool isPositiveHalfWord(const SDNode *N) const; + + SmallDenseMap<SDNode *,int> RootWeights; + SmallDenseMap<SDNode *,int> RootHeights; + SmallDenseMap<const Value *,int> GAUsesInFunction; + int getWeight(SDNode *N); + int getHeight(SDNode *N); + SDValue getMultiplierForSHL(SDNode *N); + SDValue factorOutPowerOf2(SDValue V, unsigned Power); + unsigned getUsesInFunction(const Value *V); + SDValue balanceSubTree(SDNode *N, bool Factorize = false); + void rebalanceAddressTrees(); }; // end HexagonDAGToDAGISel } // end anonymous namespace @@ -588,7 +542,7 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); - Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, + Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Value); } @@ -640,7 +594,6 @@ void HexagonDAGToDAGISel::SelectStore(SDNode *N) { void HexagonDAGToDAGISel::SelectMul(SDNode *N) { SDLoc dl(N); - // // %conv.i = sext i32 %tmp1 to i64 // %conv2.i = sext i32 %add to i64 // %mul.i = mul nsw i64 %conv2.i, %conv.i @@ -665,7 +618,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) { SelectCode(N); return; } - OP0 = Sext0; } else if (MulOp0.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); @@ -675,7 +627,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) { SelectCode(N); return; } - SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, @@ -694,7 +645,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) { SelectCode(N); return; } - OP1 = Sext1; } else if (MulOp1.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); @@ -704,7 +654,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) { SelectCode(N); return; } - SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, @@ -717,8 +666,8 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) { } // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64, - OP0, OP1); + SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, + MVT::i64, OP0, OP1); ReplaceNode(N, Result); return; } @@ -728,68 +677,56 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) { void HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDLoc dl(N); - if (N->getValueType(0) == MVT::i32) { - SDValue Shl_0 = N->getOperand(0); - SDValue Shl_1 = N->getOperand(1); - // RHS is const. - if (Shl_1.getOpcode() == ISD::Constant) { - if (Shl_0.getOpcode() == ISD::MUL) { - SDValue Mul_0 = Shl_0.getOperand(0); // Val - SDValue Mul_1 = Shl_0.getOperand(1); // Const - // RHS of mul is const. - if (Mul_1.getOpcode() == ISD::Constant) { - int32_t ShlConst = - cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); - int32_t MulConst = - cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue(); - int32_t ValConst = MulConst << ShlConst; - SDValue Val = CurDAG->getTargetConstant(ValConst, dl, - MVT::i32); - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) - if (isInt<9>(CN->getSExtValue())) { - SDNode* Result = - CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, - MVT::i32, Mul_0, Val); - ReplaceNode(N, Result); - return; - } + SDValue Shl_0 = N->getOperand(0); + SDValue Shl_1 = N->getOperand(1); + + auto Default = [this,N] () -> void { SelectCode(N); }; + + if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant) + return Default(); + + // RHS is const. + int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue(); + + if (Shl_0.getOpcode() == ISD::MUL) { + SDValue Mul_0 = Shl_0.getOperand(0); // Val + SDValue Mul_1 = Shl_0.getOperand(1); // Const + // RHS of mul is const. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) { + int32_t ValConst = C->getSExtValue() << ShlConst; + if (isInt<9>(ValConst)) { + SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32); + SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, + MVT::i32, Mul_0, Val); + ReplaceNode(N, Result); + return; + } + } + return Default(); + } - } - } else if (Shl_0.getOpcode() == ISD::SUB) { - SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 - SDValue Sub_1 = Shl_0.getOperand(1); // Val - if (Sub_0.getOpcode() == ISD::Constant) { - int32_t SubConst = - cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue(); - if (SubConst == 0) { - if (Sub_1.getOpcode() == ISD::SHL) { - SDValue Shl2_0 = Sub_1.getOperand(0); // Val - SDValue Shl2_1 = Sub_1.getOperand(1); // Const - if (Shl2_1.getOpcode() == ISD::Constant) { - int32_t ShlConst = - cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); - int32_t Shl2Const = - cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue(); - int32_t ValConst = 1 << (ShlConst+Shl2Const); - SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, - MVT::i32); - if (ConstantSDNode *CN = - dyn_cast<ConstantSDNode>(Val.getNode())) - if (isInt<9>(CN->getSExtValue())) { - SDNode* Result = - CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32, - Shl2_0, Val); - ReplaceNode(N, Result); - return; - } - } - } - } + if (Shl_0.getOpcode() == ISD::SUB) { + SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 + SDValue Sub_1 = Shl_0.getOperand(1); // Val + if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) { + if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL) + return Default(); + SDValue Shl2_0 = Sub_1.getOperand(0); // Val + SDValue Shl2_1 = Sub_1.getOperand(1); // Const + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) { + int32_t ValConst = 1 << (ShlConst + C2->getSExtValue()); + if (isInt<9>(-ValConst)) { + SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32); + SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, + MVT::i32, Shl2_0, Val); + ReplaceNode(N, Result); + return; } } } } - SelectCode(N); + + return Default(); } @@ -815,20 +752,19 @@ void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0); unsigned NE = OpVT.getVectorNumElements(); EVT ExVT = N->getValueType(0); - unsigned ES = ExVT.getVectorElementType().getSizeInBits(); + unsigned ES = ExVT.getScalarSizeInBits(); uint64_t MV = 0, Bit = 1; for (unsigned i = 0; i < NE; ++i) { MV |= Bit; Bit <<= ES; } SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64); - SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl, + SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::i64, Ones); if (ExVT.getSizeInBits() == 32) { SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64, SDValue(Mask,0), SDValue(OnesReg,0)); - SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl, - MVT::i32); + SDValue SubR = CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32); ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, SDValue(And, 0), SubR)); return; @@ -839,21 +775,18 @@ void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { return; } - SDNode *IsIntrinsic = N->getOperand(0).getNode(); - if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { - unsigned ID = - cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue(); + SDNode *Int = N->getOperand(0).getNode(); + if ((Int->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { + unsigned ID = cast<ConstantSDNode>(Int->getOperand(0))->getZExtValue(); if (doesIntrinsicReturnPredicate(ID)) { // Now we need to differentiate target data types. if (N->getValueType(0) == MVT::i64) { // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs). SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, - MVT::i32, - SDValue(IsIntrinsic, 0)); + MVT::i32, SDValue(Int, 0)); SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, - MVT::i32, - TargetConst0); + MVT::i32, TargetConst0); SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, MVT::i64, MVT::Other, SDValue(Result_2, 0), @@ -864,8 +797,7 @@ void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { if (N->getValueType(0) == MVT::i32) { // Convert the zero_extend to Rs = Pd SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, - MVT::i32, - SDValue(IsIntrinsic, 0)); + MVT::i32, SDValue(Int, 0)); ReplaceNode(N, RsPd); return; } @@ -921,19 +853,15 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { SDLoc dl(N); ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); - const APFloat &APF = CN->getValueAPF(); + APInt A = CN->getValueAPF().bitcastToAPInt(); if (N->getValueType(0) == MVT::f32) { - ReplaceNode( - N, CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32, - CurDAG->getTargetConstantFP( - APF.convertToFloat(), dl, MVT::f32))); + SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32); + ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V)); return; } - else if (N->getValueType(0) == MVT::f64) { - ReplaceNode( - N, CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64, - CurDAG->getTargetConstantFP( - APF.convertToDouble(), dl, MVT::f64))); + if (N->getValueType(0) == MVT::f64) { + SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64); + ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V)); return; } @@ -941,226 +869,46 @@ void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { } // -// Map predicate true (encoded as -1 in LLVM) to a XOR. +// Map boolean values. // void HexagonDAGToDAGISel::SelectConstant(SDNode *N) { - SDLoc dl(N); if (N->getValueType(0) == MVT::i1) { - SDNode* Result = 0; - int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); - if (Val == -1) { - Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1); - } else if (Val == 0) { - Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1); - } - if (Result) { - ReplaceNode(N, Result); - return; - } - } - - SelectCode(N); -} - - -// -// Map add followed by a asr -> asr +=. -// -void HexagonDAGToDAGISel::SelectAdd(SDNode *N) { - SDLoc dl(N); - if (N->getValueType(0) != MVT::i32) { - SelectCode(N); - return; - } - // Identify nodes of the form: add(asr(...)). - SDNode* Src1 = N->getOperand(0).getNode(); - if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse() - || Src1->getValueType(0) != MVT::i32) { - SelectCode(N); - return; - } - - // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that - // Rd and Rd' are assigned to the same register - SDNode* Result = CurDAG->getMachineNode(Hexagon::S2_asr_r_r_acc, dl, MVT::i32, - N->getOperand(1), - Src1->getOperand(0), - Src1->getOperand(1)); - ReplaceNode(N, Result); -} - -// -// Map the following, where possible. -// AND/FABS -> clrbit -// OR -> setbit -// XOR/FNEG ->toggle_bit. -// -void HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { - SDLoc dl(N); - EVT ValueVT = N->getValueType(0); - - // We handle only 32 and 64-bit bit ops. - if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 || - ValueVT == MVT::f32 || ValueVT == MVT::f64)) { - SelectCode(N); + assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1)); + unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0) + ? Hexagon::PS_true + : Hexagon::PS_false; + ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1)); return; } - // We handly only fabs and fneg for V5. - unsigned Opc = N->getOpcode(); - if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) { - SelectCode(N); - return; - } - - int64_t Val = 0; - if (Opc != ISD::FABS && Opc != ISD::FNEG) { - if (N->getOperand(1).getOpcode() == ISD::Constant) - Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue(); - else { - SelectCode(N); - return; - } - } - - if (Opc == ISD::AND) { - // Check if this is a bit-clearing AND, if not select code the usual way. - if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) || - (ValueVT == MVT::i64 && isPowerOf2_64(~Val))) - Val = ~Val; - else { - SelectCode(N); - return; - } - } - - // If OR or AND is being fed by shl, srl and, sra don't do this change, - // because Hexagon provide |= &= on shl, srl, and sra. - // Traverse the DAG to see if there is shl, srl and sra. - if (Opc == ISD::OR || Opc == ISD::AND) { - switch (N->getOperand(0)->getOpcode()) { - default: - break; - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: - SelectCode(N); - return; - } - } - - // Make sure it's power of 2. - unsigned BitPos = 0; - if (Opc != ISD::FABS && Opc != ISD::FNEG) { - if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) || - (ValueVT == MVT::i64 && !isPowerOf2_64(Val))) { - SelectCode(N); - return; - } - - // Get the bit position. - BitPos = countTrailingZeros(uint64_t(Val)); - } else { - // For fabs and fneg, it's always the 31st bit. - BitPos = 31; - } - - unsigned BitOpc = 0; - // Set the right opcode for bitwise operations. - switch (Opc) { - default: - llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); - case ISD::AND: - case ISD::FABS: - BitOpc = Hexagon::S2_clrbit_i; - break; - case ISD::OR: - BitOpc = Hexagon::S2_setbit_i; - break; - case ISD::XOR: - case ISD::FNEG: - BitOpc = Hexagon::S2_togglebit_i; - break; - } - - SDNode *Result; - // Get the right SDVal for the opcode. - SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32); - - if (ValueVT == MVT::i32 || ValueVT == MVT::f32) { - Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT, - N->getOperand(0), SDVal); - } else { - // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it. - EVT SubValueVT; - if (ValueVT == MVT::i64) - SubValueVT = MVT::i32; - else - SubValueVT = MVT::f32; - - SDNode *Reg = N->getOperand(0).getNode(); - SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, - dl, MVT::i64); - - SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl, - MVT::i32); - SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl, - MVT::i32); - - SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl, - MVT::i32, SDValue(Reg, 0)); - - SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl, - MVT::i32, SDValue(Reg, 0)); - - // Clear/set/toggle hi or lo registers depending on the bit position. - if (SubValueVT != MVT::f32 && BitPos < 32) { - SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, - SubregLO, SDVal); - const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx, - SDValue(Result0, 0), SubregLoIdx }; - Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, - dl, ValueVT, Ops); - } else { - if (Opc != ISD::FABS && Opc != ISD::FNEG) - SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32); - SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, - SubregHI, SDVal); - const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx, - SubregLO, SubregLoIdx }; - Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, - dl, ValueVT, Ops); - } - } - - ReplaceNode(N, Result); + SelectCode(N); } void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { - MachineFrameInfo *MFI = MF->getFrameInfo(); + MachineFrameInfo &MFI = MF->getFrameInfo(); const HexagonFrameLowering *HFI = HST->getFrameLowering(); int FX = cast<FrameIndexSDNode>(N)->getIndex(); unsigned StkA = HFI->getStackAlignment(); - unsigned MaxA = MFI->getMaxAlignment(); + unsigned MaxA = MFI.getMaxAlignment(); SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32); SDLoc DL(N); SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); - SDNode *R = 0; + SDNode *R = nullptr; - // Use TFR_FI when: + // Use PS_fi when: // - the object is fixed, or // - there are no objects with higher-than-default alignment, or // - there are no dynamically allocated objects. - // Otherwise, use TFR_FIA. - if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) { - R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero); + // Otherwise, use PS_fia. + if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) { + R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero); } else { auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>(); unsigned AR = HMFI.getStackAlignBaseVReg(); SDValue CH = CurDAG->getEntryNode(); SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero }; - R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops); + R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops); } ReplaceNode(N, R); @@ -1202,10 +950,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { SelectFrameIndex(N); return; - case ISD::ADD: - SelectAdd(N); - return; - case ISD::BITCAST: SelectBitcast(N); return; @@ -1226,14 +970,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { SelectMul(N); return; - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::FABS: - case ISD::FNEG: - SelectBitOp(N); - return; - case ISD::ZERO_EXTEND: SelectZeroExtend(N); return; @@ -1373,6 +1109,16 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() { SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C); ReplaceNode(T0.getNode(), NewShl.getNode()); } + + if (EnableAddressRebalancing) { + rebalanceAddressTrees(); + + DEBUG( + dbgs() << "************* SelectionDAG after preprocessing: ***********\n"; + CurDAG->dump(); + dbgs() << "************* End SelectionDAG after preprocessing ********\n"; + ); + } } void HexagonDAGToDAGISel::EmitFunctionEntryCode() { @@ -1381,11 +1127,11 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() { if (!HFI.needsAligna(*MF)) return; - MachineFrameInfo *MFI = MF->getFrameInfo(); + MachineFrameInfo &MFI = MF->getFrameInfo(); MachineBasicBlock *EntryBB = &MF->front(); unsigned AR = FuncInfo->CreateReg(MVT::i32); - unsigned MaxA = MFI->getMaxAlignment(); - BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR) + unsigned MaxA = MFI.getMaxAlignment(); + BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR) .addImm(MaxA); MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR); } @@ -1395,9 +1141,9 @@ bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { if (N.getOpcode() != ISD::FrameIndex) return false; auto &HFI = *HST->getFrameLowering(); - MachineFrameInfo *MFI = MF->getFrameInfo(); + MachineFrameInfo &MFI = MF->getFrameInfo(); int FX = cast<FrameIndexSDNode>(N)->getIndex(); - if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF)) + if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF)) return false; R = CurDAG->getTargetFrameIndex(FX, MVT::i32); return true; @@ -1519,15 +1265,15 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, } -bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const { +bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const { assert(N->getOpcode() == ISD::OR); auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); assert(C); // Detect when "or" is used to add an offset to a stack object. if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) { - MachineFrameInfo *MFI = MF->getFrameInfo(); - unsigned A = MFI->getObjectAlignment(FN->getIndex()); + MachineFrameInfo &MFI = MF->getFrameInfo(); + unsigned A = MFI.getObjectAlignment(FN->getIndex()); assert(isPowerOf2_32(A)); int32_t Off = C->getSExtValue(); // If the alleged offset fits in the zero bits guaranteed by @@ -1540,3 +1286,717 @@ bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const { bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { return N->getAlignment() >= N->getMemoryVT().getStoreSize(); } + +// Return true when the given node fits in a positive half word. +bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { + if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) { + int64_t V = CN->getSExtValue(); + return V > 0 && isInt<16>(V); + } + if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { + const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1)); + return VN->getVT().getSizeInBits() <= 16; + } + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// Rebalancing of address calculation trees + +static bool isOpcodeHandled(const SDNode *N) { + switch (N->getOpcode()) { + case ISD::ADD: + case ISD::MUL: + return true; + case ISD::SHL: + // We only handle constant shifts because these can be easily flattened + // into multiplications by 2^Op1. + return isa<ConstantSDNode>(N->getOperand(1).getNode()); + default: + return false; + } +} + +/// \brief Return the weight of an SDNode +int HexagonDAGToDAGISel::getWeight(SDNode *N) { + if (!isOpcodeHandled(N)) + return 1; + assert(RootWeights.count(N) && "Cannot get weight of unseen root!"); + assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!"); + assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!"); + return RootWeights[N]; +} + +int HexagonDAGToDAGISel::getHeight(SDNode *N) { + if (!isOpcodeHandled(N)) + return 0; + assert(RootWeights.count(N) && RootWeights[N] >= 0 && + "Cannot query height of unvisited/RAUW'd node!"); + return RootHeights[N]; +} + +namespace { +struct WeightedLeaf { + SDValue Value; + int Weight; + int InsertionOrder; + + WeightedLeaf() : Value(SDValue()) { } + + WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : + Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { + assert(Weight >= 0 && "Weight must be >= 0"); + } + + static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) { + assert(A.Value.getNode() && B.Value.getNode()); + return A.Weight == B.Weight ? + (A.InsertionOrder > B.InsertionOrder) : + (A.Weight > B.Weight); + } +}; + +/// A specialized priority queue for WeigthedLeaves. It automatically folds +/// constants and allows removal of non-top elements while maintaining the +/// priority order. +class LeafPrioQueue { + SmallVector<WeightedLeaf, 8> Q; + bool HaveConst; + WeightedLeaf ConstElt; + unsigned Opcode; + +public: + bool empty() { + return (!HaveConst && Q.empty()); + } + + size_t size() { + return Q.size() + HaveConst; + } + + bool hasConst() { + return HaveConst; + } + + const WeightedLeaf &top() { + if (HaveConst) + return ConstElt; + return Q.front(); + } + + WeightedLeaf pop() { + if (HaveConst) { + HaveConst = false; + return ConstElt; + } + std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + return Q.pop_back_val(); + } + + void push(WeightedLeaf L, bool SeparateConst=true) { + if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) { + if (Opcode == ISD::MUL && + cast<ConstantSDNode>(L.Value)->getSExtValue() == 1) + return; + if (Opcode == ISD::ADD && + cast<ConstantSDNode>(L.Value)->getSExtValue() == 0) + return; + + HaveConst = true; + ConstElt = L; + } else { + Q.push_back(L); + std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + } + } + + /// Push L to the bottom of the queue regardless of its weight. If L is + /// constant, it will not be folded with other constants in the queue. + void pushToBottom(WeightedLeaf L) { + L.Weight = 1000; + push(L, false); + } + + /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of + /// lowest weight and remove it from the queue. + WeightedLeaf findSHL(uint64_t MaxAmount); + + WeightedLeaf findMULbyConst(); + + LeafPrioQueue(unsigned Opcode) : + HaveConst(false), Opcode(Opcode) { } +}; +} // end anonymous namespace + +WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) { + int ResultPos; + WeightedLeaf Result; + + for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) { + const WeightedLeaf &L = Q[Pos]; + const SDValue &Val = L.Value; + if (Val.getOpcode() != ISD::SHL || + !isa<ConstantSDNode>(Val.getOperand(1)) || + Val.getConstantOperandVal(1) > MaxAmount) + continue; + if (!Result.Value.getNode() || Result.Weight > L.Weight || + (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder)) + { + Result = L; + ResultPos = Pos; + } + } + + if (Result.Value.getNode()) { + Q.erase(&Q[ResultPos]); + std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + } + + return Result; +} + +WeightedLeaf LeafPrioQueue::findMULbyConst() { + int ResultPos; + WeightedLeaf Result; + + for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) { + const WeightedLeaf &L = Q[Pos]; + const SDValue &Val = L.Value; + if (Val.getOpcode() != ISD::MUL || + !isa<ConstantSDNode>(Val.getOperand(1)) || + Val.getConstantOperandVal(1) > 127) + continue; + if (!Result.Value.getNode() || Result.Weight > L.Weight || + (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder)) + { + Result = L; + ResultPos = Pos; + } + } + + if (Result.Value.getNode()) { + Q.erase(&Q[ResultPos]); + std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + } + + return Result; +} + +SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) { + uint64_t MulFactor = 1ull << N->getConstantOperandVal(1); + return CurDAG->getConstant(MulFactor, SDLoc(N), + N->getOperand(1).getValueType()); +} + +/// @returns the value x for which 2^x is a factor of Val +static unsigned getPowerOf2Factor(SDValue Val) { + if (Val.getOpcode() == ISD::MUL) { + unsigned MaxFactor = 0; + for (int i = 0; i < 2; ++i) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i)); + if (!C) + continue; + const APInt &CInt = C->getAPIntValue(); + if (CInt.getBoolValue()) + MaxFactor = CInt.countTrailingZeros(); + } + return MaxFactor; + } + if (Val.getOpcode() == ISD::SHL) { + if (!isa<ConstantSDNode>(Val.getOperand(1).getNode())) + return 0; + return (unsigned) Val.getConstantOperandVal(1); + } + + return 0; +} + +/// @returns true if V>>Amount will eliminate V's operation on its child +static bool willShiftRightEliminate(SDValue V, unsigned Amount) { + if (V.getOpcode() == ISD::MUL) { + SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; + for (int i = 0; i < 2; ++i) + if (isa<ConstantSDNode>(Ops[i].getNode()) && + V.getConstantOperandVal(i) % (1ULL << Amount) == 0) { + uint64_t NewConst = V.getConstantOperandVal(i) >> Amount; + return (NewConst == 1); + } + } else if (V.getOpcode() == ISD::SHL) { + return (Amount == V.getConstantOperandVal(1)); + } + + return false; +} + +SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) { + SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; + if (V.getOpcode() == ISD::MUL) { + for (int i=0; i < 2; ++i) { + if (isa<ConstantSDNode>(Ops[i].getNode()) && + V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) { + uint64_t NewConst = V.getConstantOperandVal(i) >> Power; + if (NewConst == 1) + return Ops[!i]; + Ops[i] = CurDAG->getConstant(NewConst, + SDLoc(V), V.getValueType()); + break; + } + } + } else if (V.getOpcode() == ISD::SHL) { + uint64_t ShiftAmount = V.getConstantOperandVal(1); + if (ShiftAmount == Power) + return Ops[0]; + Ops[1] = CurDAG->getConstant(ShiftAmount - Power, + SDLoc(V), V.getValueType()); + } + + return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops); +} + +static bool isTargetConstant(const SDValue &V) { + return V.getOpcode() == HexagonISD::CONST32 || + V.getOpcode() == HexagonISD::CONST32_GP; +} + +unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) { + if (GAUsesInFunction.count(V)) + return GAUsesInFunction[V]; + + unsigned Result = 0; + const Function *CurF = CurDAG->getMachineFunction().getFunction(); + for (const User *U : V->users()) { + if (isa<Instruction>(U) && + cast<Instruction>(U)->getParent()->getParent() == CurF) + ++Result; + } + + GAUsesInFunction[V] = Result; + + return Result; +} + +/// Note - After calling this, N may be dead. It may have been replaced by a +/// new node, so always use the returned value in place of N. +/// +/// @returns The SDValue taking the place of N (which could be N if it is +/// unchanged) +SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { + assert(RootWeights.count(N) && "Cannot balance non-root node."); + assert(RootWeights[N] != -2 && "This node was RAUW'd!"); + assert(!TopLevel || N->getOpcode() == ISD::ADD); + + // Return early if this node was already visited + if (RootWeights[N] != -1) + return SDValue(N, 0); + + assert(isOpcodeHandled(N)); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // Return early if the operands will remain unchanged or are all roots + if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) && + (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) { + SDNode *Op0N = Op0.getNode(); + int Weight; + if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) { + Weight = getWeight(balanceSubTree(Op0N).getNode()); + // Weight = calculateWeight(Op0N); + } else + Weight = getWeight(Op0N); + + SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd + if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) { + Weight += getWeight(balanceSubTree(Op1N).getNode()); + // Weight += calculateWeight(Op1N); + } else + Weight += getWeight(Op1N); + + RootWeights[N] = Weight; + RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()), + getHeight(N->getOperand(1).getNode())) + 1; + + DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight + << " Height=" << RootHeights[N] << "): "); + DEBUG(N->dump()); + + return SDValue(N, 0); + } + + DEBUG(dbgs() << "** Balancing root node: "); + DEBUG(N->dump()); + + unsigned NOpcode = N->getOpcode(); + + LeafPrioQueue Leaves(NOpcode); + SmallVector<SDValue, 4> Worklist; + Worklist.push_back(SDValue(N, 0)); + + // SHL nodes will be converted to MUL nodes + if (NOpcode == ISD::SHL) + NOpcode = ISD::MUL; + + bool CanFactorize = false; + WeightedLeaf Mul1, Mul2; + unsigned MaxPowerOf2 = 0; + WeightedLeaf GA; + + // Do not try to factor out a shift if there is already a shift at the tip of + // the tree. + bool HaveTopLevelShift = false; + if (TopLevel && + ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL && + Op0.getConstantOperandVal(1) < 4) || + (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL && + Op1.getConstantOperandVal(1) < 4))) + HaveTopLevelShift = true; + + // Flatten the subtree into an ordered list of leaves; at the same time + // determine whether the tree is already balanced. + int InsertionOrder = 0; + SmallDenseMap<SDValue, int> NodeHeights; + bool Imbalanced = false; + int CurrentWeight = 0; + while (!Worklist.empty()) { + SDValue Child = Worklist.pop_back_val(); + + if (Child.getNode() != N && RootWeights.count(Child.getNode())) { + // CASE 1: Child is a root note + + int Weight = RootWeights[Child.getNode()]; + if (Weight == -1) { + Child = balanceSubTree(Child.getNode()); + // calculateWeight(Child.getNode()); + Weight = getWeight(Child.getNode()); + } else if (Weight == -2) { + // Whoops, this node was RAUWd by one of the balanceSubTree calls we + // made. Our worklist isn't up to date anymore. + // Restart the whole process. + DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); + return balanceSubTree(N, TopLevel); + } + + NodeHeights[Child] = 1; + CurrentWeight += Weight; + + unsigned PowerOf2; + if (TopLevel && !CanFactorize && !HaveTopLevelShift && + (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) && + Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) { + // Try to identify two factorizable MUL/SHL children greedily. Leave + // them out of the priority queue for now so we can deal with them + // after. + if (!Mul1.Value.getNode()) { + Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++); + MaxPowerOf2 = PowerOf2; + } else { + Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++); + MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2); + + // Our addressing modes can only shift by a maximum of 3 + if (MaxPowerOf2 > 3) + MaxPowerOf2 = 3; + + CanFactorize = true; + } + } else + Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); + } else if (!isOpcodeHandled(Child.getNode())) { + // CASE 2: Child is an unhandled kind of node (e.g. constant) + int Weight = getWeight(Child.getNode()); + + NodeHeights[Child] = getHeight(Child.getNode()); + CurrentWeight += Weight; + + if (isTargetConstant(Child) && !GA.Value.getNode()) + GA = WeightedLeaf(Child, Weight, InsertionOrder++); + else + Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); + } else { + // CASE 3: Child is a subtree of same opcode + // Visit children first, then flatten. + unsigned ChildOpcode = Child.getOpcode(); + assert(ChildOpcode == NOpcode || + (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL)); + + // Convert SHL to MUL + SDValue Op1; + if (ChildOpcode == ISD::SHL) + Op1 = getMultiplierForSHL(Child.getNode()); + else + Op1 = Child->getOperand(1); + + if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) { + assert(!NodeHeights.count(Child) && "Parent visited before children?"); + // Visit children first, then re-visit this node + Worklist.push_back(Child); + Worklist.push_back(Op1); + Worklist.push_back(Child->getOperand(0)); + } else { + // Back at this node after visiting the children + if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1) + Imbalanced = true; + + NodeHeights[Child] = std::max(NodeHeights[Op1], + NodeHeights[Child->getOperand(0)]) + 1; + } + } + } + + DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)] + << " weight=" << CurrentWeight << " imbalanced=" + << Imbalanced << "\n"); + + // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y) + // This factors out a shift in order to match memw(a<<Y+b). + if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) || + willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) { + DEBUG(dbgs() << "--> Found common factor for two MUL children!\n"); + int Weight = Mul1.Weight + Mul2.Weight; + int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1; + SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2); + SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2); + SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(), + Mul1Factored, Mul2Factored); + SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N), + Mul1.Value.getValueType()); + SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(), + Sum, Const); + NodeHeights[New] = Height; + Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder)); + } else if (Mul1.Value.getNode()) { + // We failed to factorize two MULs, so now the Muls are left outside the + // queue... add them back. + Leaves.push(Mul1); + if (Mul2.Value.getNode()) + Leaves.push(Mul2); + CanFactorize = false; + } + + // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere + // and the root node itself is not used more than twice. This reduces the + // amount of additional constant extenders introduced by this optimization. + bool CombinedGA = false; + if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() && + GA.Value.hasOneUse() && N->use_size() < 3) { + GlobalAddressSDNode *GANode = + cast<GlobalAddressSDNode>(GA.Value.getOperand(0)); + ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value); + + if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() && + getTargetLowering()->isOffsetFoldingLegal(GANode)) { + DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue() + << "): "); + DEBUG(GANode->dump()); + + SDValue NewTGA = + CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value), + GANode->getValueType(0), + GANode->getOffset() + (uint64_t)Offset->getSExtValue()); + GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value), + GA.Value.getValueType(), NewTGA); + GA.Weight += Leaves.top().Weight; + + NodeHeights[GA.Value] = getHeight(GA.Value.getNode()); + CombinedGA = true; + + Leaves.pop(); // Remove the offset constant from the queue + } + } + + if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) || + (RebalanceOnlyImbalancedTrees && !Imbalanced)) { + RootWeights[N] = CurrentWeight; + RootHeights[N] = NodeHeights[SDValue(N, 0)]; + + return SDValue(N, 0); + } + + // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5)) + if (NOpcode == ISD::ADD && GA.Value.getNode()) { + WeightedLeaf SHL = Leaves.findSHL(31); + if (SHL.Value.getNode()) { + int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1; + GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value), + GA.Value.getValueType(), + GA.Value, SHL.Value); + GA.Weight = SHL.Weight; // Specifically ignore the GA weight here + NodeHeights[GA.Value] = Height; + } + } + + if (GA.Value.getNode()) + Leaves.push(GA); + + // If this is the top level and we haven't factored out a shift, we should try + // to move a constant to the bottom to match addressing modes like memw(rX+C) + if (TopLevel && !CanFactorize && Leaves.hasConst()) { + DEBUG(dbgs() << "--> Pushing constant to tip of tree."); + Leaves.pushToBottom(Leaves.pop()); + } + + const DataLayout &DL = CurDAG->getDataLayout(); + const TargetLowering &TLI = *getTargetLowering(); + + // Rebuild the tree using Huffman's algorithm + while (Leaves.size() > 1) { + WeightedLeaf L0 = Leaves.pop(); + + // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)), + // otherwise just get the next leaf + WeightedLeaf L1 = Leaves.findMULbyConst(); + if (!L1.Value.getNode()) + L1 = Leaves.pop(); + + assert(L0.Weight <= L1.Weight && "Priority queue is broken!"); + + SDValue V0 = L0.Value; + int V0Weight = L0.Weight; + SDValue V1 = L1.Value; + int V1Weight = L1.Weight; + + // Make sure that none of these nodes have been RAUW'd + if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) || + (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) { + DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); + return balanceSubTree(N, TopLevel); + } + + ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0); + ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1); + EVT VT = N->getValueType(0); + SDValue NewNode; + + if (V0C && !V1C) { + std::swap(V0, V1); + std::swap(V0C, V1C); + } + + // Calculate height of this node + assert(NodeHeights.count(V0) && NodeHeights.count(V1) && + "Children must have been visited before re-combining them!"); + int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1; + + // Rebuild this node (and restore SHL from MUL if needed) + if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2()) + NewNode = CurDAG->getNode( + ISD::SHL, SDLoc(V0), VT, V0, + CurDAG->getConstant( + V1C->getAPIntValue().logBase2(), SDLoc(N), + TLI.getScalarShiftAmountTy(DL, V0.getValueType()))); + else + NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1); + + NodeHeights[NewNode] = Height; + + int Weight = V0Weight + V1Weight; + Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder)); + + DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height=" + << Height << "):\n"); + DEBUG(NewNode.dump()); + } + + assert(Leaves.size() == 1); + SDValue NewRoot = Leaves.top().Value; + + assert(NodeHeights.count(NewRoot)); + int Height = NodeHeights[NewRoot]; + + // Restore SHL if we earlier converted it to a MUL + if (NewRoot.getOpcode() == ISD::MUL) { + ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1)); + if (V1C && V1C->getAPIntValue().isPowerOf2()) { + EVT VT = NewRoot.getValueType(); + SDValue V0 = NewRoot.getOperand(0); + NewRoot = CurDAG->getNode( + ISD::SHL, SDLoc(NewRoot), VT, V0, + CurDAG->getConstant( + V1C->getAPIntValue().logBase2(), SDLoc(NewRoot), + TLI.getScalarShiftAmountTy(DL, V0.getValueType()))); + } + } + + if (N != NewRoot.getNode()) { + DEBUG(dbgs() << "--> Root is now: "); + DEBUG(NewRoot.dump()); + + // Replace all uses of old root by new root + CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode()); + // Mark that we have RAUW'd N + RootWeights[N] = -2; + } else { + DEBUG(dbgs() << "--> Root unchanged.\n"); + } + + RootWeights[NewRoot.getNode()] = Leaves.top().Weight; + RootHeights[NewRoot.getNode()] = Height; + + return NewRoot; +} + +void HexagonDAGToDAGISel::rebalanceAddressTrees() { + for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) { + SDNode *N = &*I++; + if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE) + continue; + + SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr(); + if (BasePtr.getOpcode() != ISD::ADD) + continue; + + // We've already processed this node + if (RootWeights.count(BasePtr.getNode())) + continue; + + DEBUG(dbgs() << "** Rebalancing address calculation in node: "); + DEBUG(N->dump()); + + // FindRoots + SmallVector<SDNode *, 4> Worklist; + + Worklist.push_back(BasePtr.getOperand(0).getNode()); + Worklist.push_back(BasePtr.getOperand(1).getNode()); + + while (!Worklist.empty()) { + SDNode *N = Worklist.pop_back_val(); + unsigned Opcode = N->getOpcode(); + + if (!isOpcodeHandled(N)) + continue; + + Worklist.push_back(N->getOperand(0).getNode()); + Worklist.push_back(N->getOperand(1).getNode()); + + // Not a root if it has only one use and same opcode as its parent + if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode()) + continue; + + // This root node has already been processed + if (RootWeights.count(N)) + continue; + + RootWeights[N] = -1; + } + + // Balance node itself + RootWeights[BasePtr.getNode()] = -1; + SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true); + + if (N->getOpcode() == ISD::LOAD) + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), + NewBasePtr, N->getOperand(2)); + else + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), + NewBasePtr, N->getOperand(3)); + + DEBUG(dbgs() << "--> Final node: "); + DEBUG(N->dump()); + } + + CurDAG->RemoveDeadNodes(); + GAUsesInFunction.clear(); + RootHeights.clear(); + RootWeights.clear(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index cdd4c2f..e87e1e6 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -12,30 +12,52 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "HexagonISelLowering.h" #include "HexagonMachineFunctionInfo.h" +#include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetCallingConv.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <utility> using namespace llvm; @@ -83,23 +105,31 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", namespace { -class HexagonCCState : public CCState { - unsigned NumNamedVarArgParams; -public: - HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, - int NumNamedVarArgParams) - : CCState(CC, isVarArg, MF, locs, C), - NumNamedVarArgParams(NumNamedVarArgParams) {} + class HexagonCCState : public CCState { + unsigned NumNamedVarArgParams; - unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } -}; -} + public: + HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, + int NumNamedVarArgParams) + : CCState(CC, isVarArg, MF, locs, C), + NumNamedVarArgParams(NumNamedVarArgParams) {} + + unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } + }; + + enum StridedLoadKind { + Even = 0, + Odd, + NoPattern + }; + +} // end anonymous namespace // Implement calling convention for Hexagon. -static bool IsHvxVectorType(MVT ty); +static bool isHvxVectorType(MVT ty); static bool CC_Hexagon(unsigned ValNo, MVT ValVT, @@ -153,13 +183,13 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, } // Deal with un-named arguments. - unsigned ofst; + unsigned Offset; if (ArgFlags.isByVal()) { // If pass-by-value, the size allocated on stack is decided // by ArgFlags.getByValSize(), not by the size of LocVT. - ofst = State.AllocateStack(ArgFlags.getByValSize(), - ArgFlags.getByValAlign()); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(ArgFlags.getByValSize(), + ArgFlags.getByValAlign()); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { @@ -173,50 +203,49 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::AExt; } if (LocVT == MVT::i32 || LocVT == MVT::f32) { - ofst = State.AllocateStack(4, 4); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } if (LocVT == MVT::i64 || LocVT == MVT::f64) { - ofst = State.AllocateStack(8, 8); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 || LocVT == MVT::v16i8) { - ofst = State.AllocateStack(16, 16); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(16, 16); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) { - ofst = State.AllocateStack(32, 32); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(32, 32); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || LocVT == MVT::v64i8 || LocVT == MVT::v512i1) { - ofst = State.AllocateStack(64, 64); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(64, 64); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) { - ofst = State.AllocateStack(128, 128); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || LocVT == MVT::v256i8) { - ofst = State.AllocateStack(256, 256); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + Offset = State.AllocateStack(256, 256); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } llvm_unreachable(nullptr); } - static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { if (ArgFlags.isByVal()) { @@ -260,7 +289,7 @@ static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT, return false; } - if (IsHvxVectorType(LocVT)) { + if (isHvxVectorType(LocVT)) { if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; } @@ -272,7 +301,6 @@ static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT, static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const MCPhysReg RegList[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5 @@ -290,7 +318,6 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -315,19 +342,16 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - - static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1, - Hexagon::V2, Hexagon::V3, - Hexagon::V4, Hexagon::V5, - Hexagon::V6, Hexagon::V7, - Hexagon::V8, Hexagon::V9, - Hexagon::V10, Hexagon::V11, - Hexagon::V12, Hexagon::V13, - Hexagon::V14, Hexagon::V15}; - static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1, - Hexagon::W2, Hexagon::W3, - Hexagon::W4, Hexagon::W5, - Hexagon::W6, Hexagon::W7}; + static const MCPhysReg VecLstS[] = { + Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, + Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14, + Hexagon::V15 + }; + static const MCPhysReg VecLstD[] = { + Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, Hexagon::W4, + Hexagon::W5, Hexagon::W6, Hexagon::W7 + }; auto &MF = State.getMachineFunction(); auto &HST = MF.getSubtarget<HexagonSubtarget>(); bool UseHVX = HST.useHVXOps(); @@ -429,16 +453,16 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, } if (LocVT == MVT::i32 || LocVT == MVT::f32) { if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; + return false; } if (LocVT == MVT::i64 || LocVT == MVT::f64) { if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; + return false; } if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) { if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; + return false; } return true; // CC didn't match. } @@ -452,7 +476,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, // return structs using these additional registers. static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, - Hexagon::R4, Hexagon::R5}; + Hexagon::R4, Hexagon::R5 }; if (unsigned Reg = State.AllocateReg(RegList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -525,7 +549,7 @@ void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) { SDValue HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) -const { + const { return SDValue(); } @@ -537,7 +561,6 @@ const { static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, @@ -545,14 +568,26 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, MachinePointerInfo(), MachinePointerInfo()); } -static bool IsHvxVectorType(MVT ty) { - return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 || - ty == MVT::v64i8 || - ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 || - ty == MVT::v128i8 || - ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 || - ty == MVT::v256i8 || - ty == MVT::v512i1 || ty == MVT::v1024i1); +static bool isHvxVectorType(MVT Ty) { + switch (Ty.SimpleTy) { + case MVT::v8i64: + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: + case MVT::v16i64: + case MVT::v32i32: + case MVT::v64i16: + case MVT::v128i8: + case MVT::v32i64: + case MVT::v64i32: + case MVT::v128i16: + case MVT::v256i8: + case MVT::v512i1: + case MVT::v1024i1: + return true; + default: + return false; + } } // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is @@ -564,7 +599,6 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { - // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; @@ -669,17 +703,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; + bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; - bool isVarArg = CLI.IsVarArg; - bool doesNotReturn = CLI.DoesNotReturn; + bool IsVarArg = CLI.IsVarArg; + bool DoesNotReturn = CLI.DoesNotReturn; bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); // Check for varargs. - int NumNamedVarArgParams = -1; + unsigned NumNamedVarArgParams = -1U; if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = GAN->getGlobal(); Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); @@ -694,32 +728,32 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + HexagonCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), NumNamedVarArgParams); - if (isVarArg) + if (IsVarArg) CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg); else CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); if (Attr.getValueAsString() == "true") - isTailCall = false; + IsTailCall = false; - if (isTailCall) { + if (IsTailCall) { bool StructAttrFlag = MF.getFunction()->hasStructRetAttr(); - isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, IsStructRet, + IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + IsVarArg, IsStructRet, StructAttrFlag, Outs, OutVals, Ins, DAG); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isMemLoc()) { - isTailCall = false; + IsTailCall = false; break; } } - DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n" + DEBUG(dbgs() << (IsTailCall ? "Eligible for Tail Call\n" : "Argument must be passed on stack. " "Not eligible for Tail Call\n")); } @@ -740,7 +774,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // Record if we need > 8 byte alignment on an argument. - bool ArgAlign = IsHvxVectorType(VA.getValVT()); + bool ArgAlign = isHvxVectorType(VA.getValVT()); NeedsArgAlign |= ArgAlign; // Promote the value if needed. @@ -792,35 +826,35 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (NeedsArgAlign && Subtarget.hasV60TOps()) { DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); - MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); // V6 vectors passed by value have 64 or 128 byte alignment depending // on whether we are 64 byte vector mode or 128 byte. bool UseHVXDbl = Subtarget.useHVXDblOps(); assert(Subtarget.useHVXOps()); const unsigned ObjAlign = UseHVXDbl ? 128 : 64; LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign); - MFI->ensureMaxAlignment(LargestAlignSeen); + MFI.ensureMaxAlignment(LargestAlignSeen); } // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); - if (!isTailCall) { + if (!IsTailCall) { SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true); Chain = DAG.getCALLSEQ_START(Chain, C, dl); } // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emitted instructions must be + // The Glue is necessary since all emitted instructions must be // stuck together. - SDValue InFlag; - if (!isTailCall) { + SDValue Glue; + if (!IsTailCall) { for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); + RegsToPass[i].second, Glue); + Glue = Chain.getValue(1); } } else { // For tail calls lower the arguments to the 'real' stack slot. @@ -833,23 +867,26 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // on every argument instead of just those arguments it would clobber. // // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); + Glue = SDValue(); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); + RegsToPass[i].second, Glue); + Glue = Chain.getValue(1); } - InFlag = SDValue(); + Glue = SDValue(); } + bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls(); + unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0; + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags); } // Returns a chain & a flag for retval copy to use. @@ -865,33 +902,32 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); } - if (InFlag.getNode()) - Ops.push_back(InFlag); + if (Glue.getNode()) + Ops.push_back(Glue); - if (isTailCall) { - MF.getFrameInfo()->setHasTailCall(); + if (IsTailCall) { + MF.getFrameInfo().setHasTailCall(); return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); } - int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3; + unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL; Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); - InFlag = Chain.getValue(1); + Glue = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), - DAG.getIntPtrConstant(0, dl, true), InFlag, dl); - InFlag = Chain.getValue(1); + DAG.getIntPtrConstant(0, dl, true), Glue, dl); + Glue = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG, InVals, OutVals, Callee); } static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, - bool isSEXTLoad, SDValue &Base, - SDValue &Offset, bool &isInc, - SelectionDAG &DAG) { + SDValue &Base, SDValue &Offset, + bool &IsInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD) return false; @@ -908,11 +944,11 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, if (ValidHVXDblType || ValidHVXType || VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { - isInc = (Ptr->getOpcode() == ISD::ADD); + IsInc = (Ptr->getOpcode() == ISD::ADD); Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); // Ensure that Offset is a constant. - return (isa<ConstantSDNode>(Offset)); + return isa<ConstantSDNode>(Offset); } return false; @@ -929,28 +965,24 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, { EVT VT; SDValue Ptr; - bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { VT = LD->getMemoryVT(); - isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { VT = ST->getMemoryVT(); - if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) { + if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) return false; - } } else { return false; } - bool isInc = false; - bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, - isInc, DAG); + bool IsInc = false; + bool isLegal = getIndexedAddressParts(Op, VT, Base, Offset, IsInc, DAG); if (isLegal) { auto &HII = *Subtarget.getInstrInfo(); int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); if (HII.isValidAutoIncImm(VT, OffsetVal)) { - AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; return true; } } @@ -1054,7 +1086,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, A = HFI.getStackAlignment(); DEBUG({ - dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: "; + dbgs () << __func__ << " Align: " << A << " Size: "; Size.getNode()->dump(&DAG); dbgs() << "\n"; }); @@ -1071,9 +1103,8 @@ SDValue HexagonTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); @@ -1173,7 +1204,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments( StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset(); // Create the frame index object for this incoming parameter... - FI = MFI->CreateFixedObject(ObjSize, StackLocation, true); + FI = MFI.CreateFixedObject(ObjSize, StackLocation, true); // Create the SelectionDAG nodes cordl, responding to a load // from this parameter. @@ -1196,10 +1227,10 @@ SDValue HexagonTargetLowering::LowerFormalArguments( if (isVarArg) { // This will point to the next argument passed via stack. - int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize, - HEXAGON_LRFP_SIZE + - CCInfo.getNextStackOffset(), - true); + int FrameIndex = MFI.CreateFixedObject(Hexagon_PointerSize, + HEXAGON_LRFP_SIZE + + CCInfo.getNextStackOffset(), + true); FuncInfo.setVarArgsFrameIndex(FrameIndex); } @@ -1392,7 +1423,6 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, DL); } - SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); @@ -1401,11 +1431,18 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { bool IsPositionIndependent = isPositionIndependent(); unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0; + unsigned Offset = 0; SDValue T; if (CPN->isMachineConstantPoolEntry()) - T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF); + T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, Offset, + TF); else - T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF); + T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, + TF); + + assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF && + "Inconsistent target flag encountered"); + if (IsPositionIndependent) return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T); return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T); @@ -1428,7 +1465,7 @@ SDValue HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) @@ -1453,7 +1490,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); - MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -1473,7 +1510,6 @@ HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const { return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); } - SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -1487,7 +1523,8 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const { if (RM == Reloc::Static) { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); - if (HLOF.isGlobalInSmallSection(GV, HTM)) + const GlobalObject *GO = GV->getBaseObject(); + if (GO && HLOF.isGlobalInSmallSection(GO, HTM)) return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA); return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA); } @@ -1536,7 +1573,7 @@ SDValue HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, @@ -1554,14 +1591,14 @@ HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, if (InFlag) { SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT), *InFlag }; - Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops); + Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); } else { SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT)}; - Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops); + Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); } // Inform MFI that function has calls. - MFI->setAdjustsStack(true); + MFI.setAdjustsStack(true); SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); @@ -1761,7 +1798,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v32i64, &Hexagon::VecDblRegs128BRegClass); addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass); } - } // @@ -1812,7 +1848,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, if (EmitJumpTables) setMinimumJumpTableEntries(MinimumJumpTables); else - setMinimumJumpTableEntries(INT_MAX); + setMinimumJumpTableEntries(std::numeric_limits<int>::max()); setOperationAction(ISD::BR_JT, MVT::Other, Expand); // Hexagon has instructions for add/sub with carry. The problem with @@ -1861,7 +1897,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // operation. There is a pattern that will match i64 mul and transform it // to a series of instructions. setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); for (unsigned IntExpOp : { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, @@ -1887,7 +1922,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, } // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f64, MVT::f32, Expand); - // Turn FP extload into load/fextend. + // Turn FP extload into load/fpextend. for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); @@ -1937,7 +1972,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, // Misc: - ISD::SELECT, ISD::ConstantPool, + ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool, // Vector: ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT, @@ -1949,12 +1984,22 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, for (unsigned VectExpOp : VectExpOps) setOperationAction(VectExpOp, VT, Expand); - // Expand all extended loads and truncating stores: + // Expand all extending loads and truncating stores: for (MVT TargetVT : MVT::vector_valuetypes()) { + if (TargetVT == VT) + continue; setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand); + setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand); setTruncStoreAction(VT, TargetVT, Expand); } + // Normalize all inputs to SELECT to be vectors of i32. + if (VT.getVectorElementType() != MVT::i32) { + MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType(ISD::SELECT, VT, VT32); + } setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); @@ -1983,17 +2028,33 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + if (UseHVX) { if (UseHVXSgl) { setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); + // We try to generate the vpack{e/o} instructions. If we fail + // we fall back upon ExpandOp. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i8, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); } else if (UseHVXDbl) { setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + // We try to generate the vpack{e/o} instructions. If we fail + // we fall back upon ExpandOp. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v128i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v128i8, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); } else { llvm_unreachable("Unrecognized HVX mode"); } @@ -2006,6 +2067,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSUB, MVT::f64, Expand); setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); @@ -2018,7 +2082,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - } else { // V4 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand); @@ -2052,13 +2115,20 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Handling of indexed loads/stores: default is "expand". // - for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { - setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal); - setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal); + for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); } - if (UseHVXDbl) { - for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { + if (UseHVXSgl) { + for (MVT VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, + MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); + } + } else if (UseHVXDbl) { + for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64, + MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64}) { setIndexedLoadAction(ISD::POST_INC, VT, Legal); setIndexedStoreAction(ISD::POST_INC, VT, Legal); } @@ -2177,17 +2247,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } - const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((HexagonISD::NodeType)Opcode) { case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA"; - case HexagonISD::ARGEXTEND: return "HexagonISD::ARGEXTEND"; case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT"; case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL"; case HexagonISD::BARRIER: return "HexagonISD::BARRIER"; + case HexagonISD::CALL: return "HexagonISD::CALL"; + case HexagonISD::CALLnr: return "HexagonISD::CALLnr"; case HexagonISD::CALLR: return "HexagonISD::CALLR"; - case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; - case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; case HexagonISD::COMBINE: return "HexagonISD::COMBINE"; case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; case HexagonISD::CONST32: return "HexagonISD::CONST32"; @@ -2196,7 +2264,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU"; case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP"; - case HexagonISD::FCONST32: return "HexagonISD::FCONST32"; case HexagonISD::INSERT: return "HexagonISD::INSERT"; case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; case HexagonISD::JT: return "HexagonISD::JT"; @@ -2218,6 +2285,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; + case HexagonISD::VPACK: return "HexagonISD::VPACK"; case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; @@ -2247,12 +2315,13 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32); } -// shouldExpandBuildVectorWithShuffles -// Should we expand the build vector with shuffles? -bool -HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, - unsigned DefinedValues) const { +bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + return isOperationLegalOrCustom(ISD::FMA, VT); +} +// Should we expand the build vector with shuffles? +bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const { // Hexagon vector shuffle operates on element sizes of bytes or halfwords EVT EltVT = VT.getVectorElementType(); int EltBits = EltVT.getSizeInBits(); @@ -2262,14 +2331,48 @@ HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } -// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and -// V2 are the two vectors to select data from, V3 is the permutation. -static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { +static StridedLoadKind isStridedLoad(const ArrayRef<int> &Mask) { + int even_start = -2; + int odd_start = -1; + size_t mask_len = Mask.size(); + for (auto idx : Mask) { + if ((idx - even_start) == 2) + even_start = idx; + else + break; + } + if (even_start == (int)(mask_len * 2) - 2) + return StridedLoadKind::Even; + for (auto idx : Mask) { + if ((idx - odd_start) == 2) + odd_start = idx; + else + break; + } + if (odd_start == (int)(mask_len * 2) - 1) + return StridedLoadKind::Odd; + + return StridedLoadKind::NoPattern; +} + +bool HexagonTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, + EVT VT) const { + if (Subtarget.useHVXOps()) + return isStridedLoad(Mask) != StridedLoadKind::NoPattern; + return true; +} + +// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors +// to select data from, V3 is the permutation. +SDValue +HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) + const { const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); + bool UseHVX = Subtarget.useHVXOps(); if (V2.isUndef()) V2 = V1; @@ -2288,17 +2391,42 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && !isa<ConstantSDNode>(V1.getOperand(0))) { bool IsScalarToVector = true; - for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) { if (!V1.getOperand(i).isUndef()) { IsScalarToVector = false; break; } + } if (IsScalarToVector) return createSplat(DAG, dl, VT, V1.getOperand(0)); } return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32)); } + if (UseHVX) { + ArrayRef<int> Mask = SVN->getMask(); + size_t MaskLen = Mask.size(); + int ElemSizeInBits = VT.getScalarSizeInBits(); + if ((Subtarget.useHVXSglOps() && (ElemSizeInBits * MaskLen) == 64 * 8) || + (Subtarget.useHVXDblOps() && (ElemSizeInBits * MaskLen) == 128 * 8)) { + // Return 1 for odd and 2 of even + StridedLoadKind Pattern = isStridedLoad(Mask); + + if (Pattern == StridedLoadKind::NoPattern) + return SDValue(); + + SDValue Vec0 = Op.getOperand(0); + SDValue Vec1 = Op.getOperand(1); + SDValue StridePattern = DAG.getConstant(Pattern, dl, MVT::i32); + SDValue Ops[] = { Vec1, Vec0, StridePattern }; + return DAG.getNode(HexagonISD::VPACK, dl, VT, Ops); + } + // We used to assert in the "else" part here, but that is bad for Halide + // Halide creates intermediate double registers by interleaving two + // concatenated vector registers. The interleaving requires vector_shuffle + // nodes and we shouldn't barf on a double register result of a + // vector_shuffle because it is most likely an intermediate result. + } // FIXME: We need to support more general vector shuffles. See // below the comment from the ARM backend that deals in the general // case with the vector shuffles. For now, let expand handle these. @@ -2321,11 +2449,12 @@ static bool isCommonSplatElement(BuildVectorSDNode *BVN) { return true; } -// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert +// Lower a vector shift. Try to convert // <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific // <VT> = SHL/SRA/SRL <VT> by <IT/i32>. -static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) { - BuildVectorSDNode *BVN = 0; +SDValue +HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { + BuildVectorSDNode *BVN = nullptr; SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDValue V3; @@ -2442,7 +2571,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16, BVN->getOperand(1), BVN->getOperand(0)); - return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16, + return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::v2i16, pack); } } @@ -2474,6 +2603,9 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Res = (Res << EltSize) | Val; } + if (Size > 64) + return SDValue(); + if (Size == 64) ConstVal = DAG.getConstant(Res, dl, MVT::i64); else @@ -2497,7 +2629,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { continue; if (VT.getSizeInBits() == 64 && - Operand.getValueType().getSizeInBits() == 32) { + Operand.getValueSizeInBits() == 32) { SDValue C = DAG.getConstant(0, dl, MVT::i32); Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); } @@ -2562,7 +2694,7 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, unsigned N = NElts-i-1; SDValue OpN = Op.getOperand(N); - if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) { + if (VT.getSizeInBits() == 64 && OpN.getValueSizeInBits() == 32) { SDValue C = DAG.getConstant(0, dl, MVT::i32); OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); } @@ -2571,16 +2703,66 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); if (VT.getSizeInBits() == 32) V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); - else + else if (VT.getSizeInBits() == 64) V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); + else + return SDValue(); } return DAG.getNode(ISD::BITCAST, dl, VT, V); } SDValue +HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getOperand(0).getValueType(); + SDLoc dl(Op); + bool UseHVX = Subtarget.useHVXOps(); + bool UseHVXSgl = Subtarget.useHVXSglOps(); + // Just in case... + + if (!VT.isVector() || !UseHVX) + return SDValue(); + + EVT ResVT = Op.getValueType(); + unsigned ResSize = ResVT.getSizeInBits(); + unsigned VectorSizeInBits = UseHVXSgl ? (64 * 8) : (128 * 8); + unsigned OpSize = VT.getSizeInBits(); + + // We deal only with cases where the result is the vector size + // and the vector operand is a double register. + if (!(ResVT.isByteSized() && ResSize == VectorSizeInBits) || + !(VT.isByteSized() && OpSize == 2 * VectorSizeInBits)) + return SDValue(); + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!Cst) + return SDValue(); + unsigned Val = Cst->getZExtValue(); + + // These two will get lowered to an appropriate EXTRACT_SUBREG in ISel. + if (Val == 0) { + SDValue Vec = Op.getOperand(0); + return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResVT, Vec); + } + + if (ResVT.getVectorNumElements() == Val) { + SDValue Vec = Op.getOperand(0); + return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResVT, Vec); + } + + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const { + // If we are dealing with EXTRACT_SUBVECTOR on a HVX type, we may + // be able to simplify it to an EXTRACT_SUBREG. + if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && Subtarget.useHVXOps() && + isHvxVectorType(Op.getValueType().getSimpleVT())) + return LowerEXTRACT_SUBVECTOR_HVX(Op, DAG); + EVT VT = Op.getValueType(); int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; SDLoc dl(Op); @@ -2607,27 +2789,28 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, if (W == 32) { // Translate this node into EXTRACT_SUBREG. - unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0; + unsigned Subreg = (X == 0) ? Hexagon::isub_lo : 0; if (X == 0) - Subreg = Hexagon::subreg_loreg; + Subreg = Hexagon::isub_lo; else if (SVT == MVT::v2i32 && X == 1) - Subreg = Hexagon::subreg_hireg; + Subreg = Hexagon::isub_hi; else if (SVT == MVT::v4i16 && X == 2) - Subreg = Hexagon::subreg_hireg; + Subreg = Hexagon::isub_hi; else if (SVT == MVT::v8i8 && X == 4) - Subreg = Hexagon::subreg_hireg; + Subreg = Hexagon::isub_hi; else llvm_unreachable("Bad offset"); N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec); - } else if (VecVT.getSizeInBits() == 32) { + } else if (SVT.getSizeInBits() == 32) { N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops); - } else { + } else if (SVT.getSizeInBits() == 64) { N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops); if (VT.getSizeInBits() == 32) - N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); - } + N = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, N); + } else + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, N); } @@ -2647,7 +2830,7 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, } else { N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops); if (VT.getSizeInBits() == 32) - N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + N = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, N); } return DAG.getNode(ISD::BITCAST, dl, VT, N); } @@ -2674,8 +2857,10 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, SDValue N; if (VT.getSizeInBits() == 32) N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops); - else + else if (VT.getSizeInBits() == 64) N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops); + else + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, N); } @@ -2687,8 +2872,7 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, DAG.getConstant(32, dl, MVT::i64)); SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); - if (VT.getSizeInBits() == 64 && - Val.getValueType().getSizeInBits() == 32) { + if (VT.getSizeInBits() == 64 && Val.getValueSizeInBits() == 32) { SDValue C = DAG.getConstant(0, dl, MVT::i32); Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val); } @@ -2698,8 +2882,10 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, SDValue N; if (VT.getSizeInBits() == 32) N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); - else + else if (VT.getSizeInBits() == 64) N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + else + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, N); } @@ -2800,20 +2986,6 @@ HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T); } -MachineBasicBlock *HexagonTargetLowering::EmitInstrWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *BB) const { - switch (MI.getOpcode()) { - case Hexagon::ALLOCA: { - MachineFunction *MF = BB->getParent(); - auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>(); - FuncInfo->addAllocaAdjustInst(&MI); - return BB; - } - default: - llvm_unreachable("Unexpected instr type to insert"); - } // switch -} - //===----------------------------------------------------------------------===// // Inline Assembly Support //===----------------------------------------------------------------------===// @@ -2832,7 +3004,7 @@ HexagonTargetLowering::getConstraintType(StringRef Constraint) const { return TargetLowering::getConstraintType(Constraint); } -std::pair<unsigned, const TargetRegisterClass *> +std::pair<unsigned, const TargetRegisterClass*> HexagonTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); @@ -2840,53 +3012,53 @@ HexagonTargetLowering::getRegForInlineAsmConstraint( if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 - switch (VT.SimpleTy) { - default: - llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); - case MVT::i32: - case MVT::i16: - case MVT::i8: - case MVT::f32: - return std::make_pair(0U, &Hexagon::IntRegsRegClass); - case MVT::i64: - case MVT::f64: - return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::f32: + return std::make_pair(0U, &Hexagon::IntRegsRegClass); + case MVT::i64: + case MVT::f64: + return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); } case 'q': // q0-q3 - switch (VT.SimpleTy) { - default: - llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); - case MVT::v1024i1: - case MVT::v512i1: - case MVT::v32i16: - case MVT::v16i32: - case MVT::v64i8: - case MVT::v8i64: - return std::make_pair(0U, &Hexagon::VecPredRegsRegClass); - } + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v1024i1: + case MVT::v512i1: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VecPredRegsRegClass); + } case 'v': // V0-V31 - switch (VT.SimpleTy) { - default: - llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); - case MVT::v16i32: - case MVT::v32i16: - case MVT::v64i8: - case MVT::v8i64: - return std::make_pair(0U, &Hexagon::VectorRegsRegClass); - case MVT::v32i32: - case MVT::v64i16: - case MVT::v16i64: - case MVT::v128i8: - if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl) - return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass); - else - return std::make_pair(0U, &Hexagon::VecDblRegsRegClass); - case MVT::v256i8: - case MVT::v128i16: - case MVT::v64i32: - case MVT::v32i64: - return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass); - } + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VectorRegsRegClass); + case MVT::v32i32: + case MVT::v64i16: + case MVT::v16i64: + case MVT::v128i8: + if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl) + return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass); + return std::make_pair(0U, &Hexagon::VecDblRegsRegClass); + case MVT::v256i8: + case MVT::v128i16: + case MVT::v64i32: + case MVT::v32i64: + return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass); + } default: llvm_unreachable("Unknown asm register class"); @@ -2908,16 +3080,30 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { - // Allows a signed-extended 11-bit immediate field. - if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) - return false; + if (Ty->isSized()) { + // When LSR detects uses of the same base address to access different + // types (e.g. unions), it will assume a conservative type for these + // uses: + // LSR Use: Kind=Address of void in addrspace(4294967295), ... + // The type Ty passed here would then be "void". Skip the alignment + // checks, but do not return false right away, since that confuses + // LSR into crashing. + unsigned A = DL.getABITypeAlignment(Ty); + // The base offset must be a multiple of the alignment. + if ((AM.BaseOffs % A) != 0) + return false; + // The shifted offset must fit in 11 bits. + if (!isInt<11>(AM.BaseOffs >> Log2_32(A))) + return false; + } // No global is ever allowed as a base. if (AM.BaseGV) return false; int Scale = AM.Scale; - if (Scale < 0) Scale = -Scale; + if (Scale < 0) + Scale = -Scale; switch (Scale) { case 0: // No scale reg, "r+i", "r", or just "i". break; @@ -2934,7 +3120,6 @@ bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) return HTM.getRelocationModel() == Reloc::Static; } - /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can compare /// a register against the immediate without having to materialize the @@ -2966,14 +3151,20 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( // *************************************************************************** // If this is a tail call via a function pointer, then don't do it! - if (!(isa<GlobalAddressSDNode>(Callee)) && - !(isa<ExternalSymbolSDNode>(Callee))) { + if (!isa<GlobalAddressSDNode>(Callee) && + !isa<ExternalSymbolSDNode>(Callee)) { return false; } - // Do not optimize if the calling conventions do not match. - if (!CCMatch) - return false; + // Do not optimize if the calling conventions do not match and the conventions + // used are not C or Fast. + if (!CCMatch) { + bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast); + bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast); + // If R & E, then ok. + if (!R || !E) + return false; + } // Do not tail call optimize vararg calls. if (isVarArg) @@ -2991,18 +3182,33 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( return true; } -// Return true when the given node fits in a positive half word. -bool llvm::isPositiveHalfWord(SDNode *N) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue())) - return true; +/// Returns the target specific optimal type for load and store operations as +/// a result of memset, memcpy, and memmove lowering. +/// +/// If DstAlign is zero that means it's safe to destination alignment can +/// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't +/// a need to check it against alignment requirement, probably because the +/// source does not need to be loaded. If 'IsMemset' is true, that means it's +/// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of +/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it +/// does not need to be loaded. It returns EVT::Other if the type should be +/// determined using generic target-independent logic. +EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, + bool MemcpyStrSrc, MachineFunction &MF) const { + + auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool { + return (GivenA % MinA) == 0; + }; - switch (N->getOpcode()) { - default: - return false; - case ISD::SIGN_EXTEND_INREG: - return true; - } + if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8))) + return MVT::i64; + if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4))) + return MVT::i32; + if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2))) + return MVT::i16; + + return MVT::Other; } bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, @@ -3030,7 +3236,6 @@ bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return false; } - std::pair<const TargetRegisterClass*, uint8_t> HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 71f6734..a8ed29e 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -16,30 +16,33 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H #include "Hexagon.h" -#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/Target/TargetLowering.h" +#include <cstdint> +#include <utility> namespace llvm { -// Return true when the given node fits in a positive half word. -bool isPositiveHalfWord(SDNode *N); +namespace HexagonISD { - namespace HexagonISD { enum NodeType : unsigned { OP_BEGIN = ISD::BUILTIN_OP_END, CONST32 = OP_BEGIN, CONST32_GP, // For marking data present in GP. - FCONST32, ALLOCA, - ARGEXTEND, AT_GOT, // Index in GOT. AT_PCREL, // Offset relative to PC. - CALLv3, // A V3+ call instruction. - CALLv3nr, // A V3+ call instruction that doesn't return. + CALL, // Function call. + CALLnr, // Function call that does not return. CALLR, RET_FLAG, // Return with a flag operand. @@ -79,24 +82,26 @@ bool isPositiveHalfWord(SDNode *N); EXTRACTU, EXTRACTURP, VCOMBINE, + VPACK, TC_RETURN, EH_RETURN, DCFETCH, OP_END }; - } + +} // end namespace HexagonISD class HexagonSubtarget; class HexagonTargetLowering : public TargetLowering { int VarArgsFrameOffset; // Frame offset to start of varargs area. + const HexagonTargetMachine &HTM; + const HexagonSubtarget &Subtarget; bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) const; void promoteLdStType(MVT VT, MVT PromotedLdStVT); - const HexagonTargetMachine &HTM; - const HexagonSubtarget &Subtarget; public: explicit HexagonTargetLowering(const TargetMachine &TM, @@ -116,15 +121,27 @@ bool isPositiveHalfWord(SDNode *N); bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + /// Return true if an FMA operation is faster than a pair of mul and add + /// instructions. fmuladd intrinsics will be expanded to FMAs when this + /// method returns true (and FMAs are legal), otherwise fmuladd is + /// expanded to mul + add. + bool isFMAFasterThanFMulAndFAdd(EVT) const override; + // Should we expand the build vector with shuffles? bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, EVT VT) + const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; const char *getTargetNodeName(unsigned Opcode) const override; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; @@ -174,9 +191,6 @@ bool isPositiveHalfWord(SDNode *N); const SDLoc &dl, SelectionDAG &DAG) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr &MI, - MachineBasicBlock *BB) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. @@ -195,6 +209,7 @@ bool isPositiveHalfWord(SDNode *N); SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override { if (!VT.isVector()) @@ -243,6 +258,10 @@ bool isPositiveHalfWord(SDNode *N); /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; + EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + MachineFunction &MF) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, bool *Fast) const override; @@ -269,6 +288,7 @@ bool isPositiveHalfWord(SDNode *N); findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; }; + } // end namespace llvm -#endif // Hexagon_ISELLOWERING_H +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td index 9cbeae7..7283d94 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td @@ -63,34 +63,34 @@ def : InstAlias<"memw($Rs) = $Rt.new", (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; def : InstAlias<"memb($Rs) = #$S8", - (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + (S4_storeirb_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>; def : InstAlias<"memh($Rs) = #$S8", - (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + (S4_storeirh_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>; def : InstAlias<"memw($Rs) = #$S8", - (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + (S4_storeiri_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>; def : InstAlias<"memd($Rs) = $Rtt", (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; def : InstAlias<"memb($Rs) = setbit(#$U5)", - (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + (L4_ior_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>; def : InstAlias<"memh($Rs) = setbit(#$U5)", - (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + (L4_ior_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>; def : InstAlias<"memw($Rs) = setbit(#$U5)", - (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + (L4_ior_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>; def : InstAlias<"memb($Rs) = clrbit(#$U5)", - (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + (L4_iand_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>; def : InstAlias<"memh($Rs) = clrbit(#$U5)", - (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + (L4_iand_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>; def : InstAlias<"memw($Rs) = clrbit(#$U5)", - (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + (L4_iand_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>; // Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs) def : InstAlias<"$Rd = memb($Rs)", @@ -241,40 +241,40 @@ def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new", (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; def : InstAlias<"if ($Pt) memb($Rs) = #$S6", - (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if ($Pt) memh($Rs) = #$S6", - (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if ($Pt) memw($Rs) = #$S6", - (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6", - (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6", - (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6", - (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if (!$Pt) memb($Rs) = #$S6", - (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if (!$Pt) memh($Rs) = #$S6", - (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if (!$Pt) memw($Rs) = #$S6", - (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6", - (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6", - (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6", - (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>; // Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -= // to: memXX($Rs) |= $Rt @@ -295,11 +295,11 @@ def : InstAlias<"memb($Rs) -= $Rt", Requires<[UseMEMOP]>; def : InstAlias<"memb($Rs) += #$U5", - (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + (L4_iadd_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>, Requires<[UseMEMOP]>; def : InstAlias<"memb($Rs) -= #$U5", - (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + (L4_isub_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>, Requires<[UseMEMOP]>; def : InstAlias<"memh($Rs) &= $Rt", @@ -319,11 +319,11 @@ def : InstAlias<"memh($Rs) -= $Rt", Requires<[UseMEMOP]>; def : InstAlias<"memh($Rs) += #$U5", - (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + (L4_iadd_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>, Requires<[UseMEMOP]>; def : InstAlias<"memh($Rs) -= #$U5", - (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + (L4_isub_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>, Requires<[UseMEMOP]>; def : InstAlias<"memw($Rs) &= $Rt", @@ -343,11 +343,11 @@ def : InstAlias<"memw($Rs) -= $Rt", Requires<[UseMEMOP]>; def : InstAlias<"memw($Rs) += #$U5", - (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + (L4_iadd_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>, Requires<[UseMEMOP]>; def : InstAlias<"memw($Rs) -= #$U5", - (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + (L4_isub_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>, Requires<[UseMEMOP]>; // @@ -492,12 +492,10 @@ def : InstAlias<"if ($src1) jumpr $src2", def : InstAlias<"if (!$src1) jumpr $src2", (J2_jumprf PredRegs:$src1, IntRegs:$src2), 0>; -// V6_vassignp: Vector assign mapping. -let hasNewValue = 1, opNewValue = 0, isAsmParserOnly = 1 in -def HEXAGON_V6_vassignpair: CVI_VA_DV_Resource < - (outs VecDblRegs:$Vdd), - (ins VecDblRegs:$Vss), - "$Vdd = $Vss">; +// maps Vdd = Vss to Vdd = V6_vassignp(Vss) +def : InstAlias<"$Vdd = $Vss", + (V6_vassignp VecDblRegs:$Vdd, VecDblRegs:$Vss)>, + Requires<[HasV60T]>; // maps Vd = #0 to Vd = vxor(Vd, Vd) def : InstAlias<"$Vd = #0", diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td index 0bfb044..fa3cccb 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -54,7 +54,7 @@ class MemAccessSize<bits<4> value> { bits<4> Value = value; } -def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction. +def NoMemAccess : MemAccessSize<0>;// Not a memory access instruction. def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb). def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh). def WordAccess : MemAccessSize<3>;// Word access instruction (memw). @@ -179,6 +179,9 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<1> isAccumulator = 0; let TSFlags{54} = isAccumulator; + bit cofMax1 = 0; + let TSFlags{60} = cofMax1; + // Fields used for relation models. bit isNonTemporal = 0; string isNT = ""; // set to "true" for non-temporal vector stores. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td index e17f71f..493d047 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -17,7 +17,7 @@ // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// -def TypeMEMOP : IType<9>; +def TypeV4LDST : IType<9>; def TypeNV : IType<10>; def TypeDUPLEX : IType<11>; def TypeCOMPOUND : IType<12>; @@ -132,7 +132,7 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayLoad = 1, mayStore = 1 in class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>, + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeV4LDST>, OpcodeHexagon; class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td index f3d43de..b9f4373 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// //----------------------------------------------------------------------------// -// Hexagon Intruction Flags + +// Hexagon Instruction Flags + // // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// @@ -34,7 +34,7 @@ def TypeCVI_VM_NEW_ST : IType<26>; def TypeCVI_VM_STU : IType<27>; def TypeCVI_HIST : IType<28>; //----------------------------------------------------------------------------// -// Intruction Classes Definitions + +// Instruction Classes Definitions + //----------------------------------------------------------------------------// let validSubTargets = HasV60SubT in diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index fe9f97d..0a7dc6b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,24 +11,45 @@ // //===----------------------------------------------------------------------===// -#include "HexagonInstrInfo.h" #include "Hexagon.h" +#include "HexagonHazardRecognizer.h" +#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> #include <cctype> +#include <cstdint> +#include <cstring> +#include <iterator> using namespace llvm; @@ -39,8 +60,6 @@ using namespace llvm; #include "HexagonGenInstrInfo.inc" #include "HexagonGenDFAPacketizer.inc" -using namespace llvm; - cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" "packetization boundary.")); @@ -67,6 +86,10 @@ static cl::opt<bool> EnableACCForwarding( static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm")); +static cl::opt<bool> UseDFAHazardRec("dfa-hazard-rec", + cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Use the DFA based hazard recognizer.")); + /// /// Constants for Hexagon instructions. /// @@ -104,19 +127,16 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), RI() {} - static bool isIntRegForSubInst(unsigned Reg) { return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); } - static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) { - return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) && - isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg)); + return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) && + isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi)); } - /// Calculate number of instructions excluding the debug instructions. static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, MachineBasicBlock::const_instr_iterator MIE) { @@ -128,7 +148,6 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, return Count; } - /// Find the hardware loop instruction used to set-up the specified loop. /// On Hexagon, we have two instructions used to set-up the hardware loop /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions @@ -160,27 +179,26 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, return &*I; // We've reached a different loop, which means the loop0 has been removed. if (Opc == EndLoopOp) - return 0; + return nullptr; } // Check the predecessors for the LOOP instruction. MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited); if (loop) return loop; } - return 0; + return nullptr; } - /// Gather register def/uses from MI. /// This treats possible (predicated) defs as actually happening ones /// (conservatively). -static inline void parseOperands(const MachineInstr *MI, +static inline void parseOperands(const MachineInstr &MI, SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) { Defs.clear(); Uses.clear(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; @@ -197,7 +215,6 @@ static inline void parseOperands(const MachineInstr *MI, } } - // Position dependent, so check twice for swap. static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { @@ -224,8 +241,6 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { return false; } - - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If @@ -236,10 +251,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, switch (MI.getOpcode()) { default: break; - case Hexagon::L2_loadrb_io: - case Hexagon::L2_loadrub_io: - case Hexagon::L2_loadrh_io: - case Hexagon::L2_loadruh_io: case Hexagon::L2_loadri_io: case Hexagon::L2_loadrd_io: case Hexagon::V6_vL32b_ai: @@ -248,14 +259,10 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::LDriw_pred: case Hexagon::LDriw_mod: - case Hexagon::LDriq_pred_V6: - case Hexagon::LDriq_pred_vec_V6: - case Hexagon::LDriv_pseudo_V6: - case Hexagon::LDrivv_pseudo_V6: - case Hexagon::LDriq_pred_V6_128B: - case Hexagon::LDriq_pred_vec_V6_128B: - case Hexagon::LDriv_pseudo_V6_128B: - case Hexagon::LDrivv_pseudo_V6_128B: { + case Hexagon::PS_vloadrq_ai: + case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrq_ai_128B: + case Hexagon::PS_vloadrw_ai_128B: { const MachineOperand OpFI = MI.getOperand(1); if (!OpFI.isFI()) return 0; @@ -266,14 +273,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return MI.getOperand(0).getReg(); } - case Hexagon::L2_ploadrbt_io: - case Hexagon::L2_ploadrbf_io: - case Hexagon::L2_ploadrubt_io: - case Hexagon::L2_ploadrubf_io: - case Hexagon::L2_ploadrht_io: - case Hexagon::L2_ploadrhf_io: - case Hexagon::L2_ploadruht_io: - case Hexagon::L2_ploadruhf_io: case Hexagon::L2_ploadrit_io: case Hexagon::L2_ploadrif_io: case Hexagon::L2_ploadrdt_io: @@ -292,7 +291,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return 0; } - /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -313,14 +311,10 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, case Hexagon::V6_vS32Ub_ai_128B: case Hexagon::STriw_pred: case Hexagon::STriw_mod: - case Hexagon::STriq_pred_V6: - case Hexagon::STriq_pred_vec_V6: - case Hexagon::STriv_pseudo_V6: - case Hexagon::STrivv_pseudo_V6: - case Hexagon::STriq_pred_V6_128B: - case Hexagon::STriq_pred_vec_V6_128B: - case Hexagon::STriv_pseudo_V6_128B: - case Hexagon::STrivv_pseudo_V6_128B: { + case Hexagon::PS_vstorerq_ai: + case Hexagon::PS_vstorerw_ai: + case Hexagon::PS_vstorerq_ai_128B: + case Hexagon::PS_vstorerw_ai_128B: { const MachineOperand &OpFI = MI.getOperand(0); if (!OpFI.isFI()) return 0; @@ -353,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, return 0; } - /// This function can analyze one/two way branching only and should (mostly) be /// called by target independent side. /// First entry is always the opcode of the branching instruction, except when @@ -417,7 +410,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Delete the J2_jump if it's equivalent to a fall-through. if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); + DEBUG(dbgs() << "\nErasing the jump to successor block\n";); I->eraseFromParent(); I = MBB.instr_end(); if (I == MBB.instr_begin()) @@ -431,7 +424,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineInstr *LastInst = &*I; MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. - for (;;) { + while (true) { if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) { if (!SecondLastInst) SecondLastInst = &*I; @@ -455,7 +448,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); - bool LastOpcodeHasNVJump = isNewValueJump(LastInst); + bool LastOpcodeHasNVJump = isNewValueJump(*LastInst); if (LastOpcodeHasJMP_c && !LastInst->getOperand(1).isMBB()) return true; @@ -493,7 +486,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); - bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst); + bool SecLastOpcodeHasNVJump = isNewValueJump(*SecondLastInst); if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { if (!SecondLastInst->getOperand(1).isMBB()) return true; @@ -540,8 +533,10 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } +unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + assert(!BytesRemoved && "code size not handled"); -unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber()); MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; @@ -561,17 +556,19 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return Count; } -unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, +unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - const DebugLoc &DL) const { + const DebugLoc &DL, + int *BytesAdded) const { unsigned BOpc = Hexagon::J2_jump; unsigned BccOpc = Hexagon::J2_jumpt; assert(validateBranchCond(Cond) && "Invalid branching condition"); - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert(TBB && "insertBranch must not be told to insert a fallthrough"); + assert(!BytesAdded && "code size not handled"); - // Check if ReverseBranchCondition has asked to reverse this branch + // Check if reverseBranchCondition has asked to reverse this branch // If we want to reverse the branch an odd number of times, we want // J2_jumpf. if (!Cond.empty() && Cond[0].isImm()) @@ -587,13 +584,11 @@ unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, SmallVector<MachineOperand, 4> Cond; auto Term = MBB.getFirstTerminator(); if (Term != MBB.end() && isPredicated(*Term) && - !analyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { - MachineBasicBlock *NextBB = &*++MBB.getIterator(); - if (NewTBB == NextBB) { - ReverseBranchCondition(Cond); - RemoveBranch(MBB); - return InsertBranch(MBB, TBB, nullptr, Cond, DL); - } + !analyzeBranch(MBB, NewTBB, NewFBB, Cond, false) && + MachineFunction::iterator(NewTBB) == ++MBB.getIterator()) { + reverseBranchCondition(Cond); + removeBranch(MBB); + return insertBranch(MBB, TBB, nullptr, Cond, DL); } BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else if (isEndLoopN(Cond[0].getImm())) { @@ -657,6 +652,85 @@ unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, return 2; } +/// Analyze the loop code to find the loop induction variable and compare used +/// to compute the number of iterations. Currently, we analyze loop that are +/// controlled using hardware loops. In this case, the induction variable +/// instruction is null. For all other cases, this function returns true, which +/// means we're unable to analyze it. +bool HexagonInstrInfo::analyzeLoop(MachineLoop &L, + MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const { + + MachineBasicBlock *LoopEnd = L.getBottomBlock(); + MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); + // We really "analyze" only hardware loops right now. + if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) { + IndVarInst = nullptr; + CmpInst = &*I; + return false; + } + return true; +} + +/// Generate code to reduce the loop iteration by one and check if the loop is +/// finished. Return the value/register of the new loop count. this function +/// assumes the nth iteration is peeled first. +unsigned HexagonInstrInfo::reduceLoopCount(MachineBasicBlock &MBB, + MachineInstr *IndVar, MachineInstr &Cmp, + SmallVectorImpl<MachineOperand> &Cond, + SmallVectorImpl<MachineInstr *> &PrevInsts, + unsigned Iter, unsigned MaxIter) const { + // We expect a hardware loop currently. This means that IndVar is set + // to null, and the compare is the ENDLOOP instruction. + assert((!IndVar) && isEndLoopN(Cmp.getOpcode()) + && "Expecting a hardware loop"); + MachineFunction *MF = MBB.getParent(); + DebugLoc DL = Cmp.getDebugLoc(); + SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; + MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(), VisitedBBs); + if (!Loop) + return 0; + // If the loop trip count is a compile-time value, then just change the + // value. + if (Loop->getOpcode() == Hexagon::J2_loop0i || + Loop->getOpcode() == Hexagon::J2_loop1i) { + int64_t Offset = Loop->getOperand(1).getImm(); + if (Offset <= 1) + Loop->eraseFromParent(); + else + Loop->getOperand(1).setImm(Offset - 1); + return Offset - 1; + } + // The loop trip count is a run-time value. We generate code to subtract + // one from the trip count, and update the loop instruction. + assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction"); + unsigned LoopCount = Loop->getOperand(1).getReg(); + // Check if we're done with the loop. + unsigned LoopEnd = createVR(MF, MVT::i1); + MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd). + addReg(LoopCount).addImm(1); + unsigned NewLoopCount = createVR(MF, MVT::i32); + MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount). + addReg(LoopCount).addImm(-1); + // Update the previously generated instructions with the new loop counter. + for (SmallVectorImpl<MachineInstr *>::iterator I = PrevInsts.begin(), + E = PrevInsts.end(); I != E; ++I) + (*I)->substituteRegister(LoopCount, NewLoopCount, 0, getRegisterInfo()); + PrevInsts.clear(); + PrevInsts.push_back(NewCmp); + PrevInsts.push_back(NewAdd); + // Insert the new loop instruction if this is the last time the loop is + // decremented. + if (Iter == MaxIter) + BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)). + addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount); + // Delete the old loop instruction. + if (Iter == 0) + Loop->eraseFromParent(); + Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf)); + Cond.push_back(NewCmp->getOperand(0)); + return NewLoopCount; +} bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, @@ -664,7 +738,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, return nonDbgBBSize(&MBB) <= 3; } - bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) @@ -672,7 +745,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3; } - bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs, BranchProbability Probability) const { return NumInstrs <= 4; @@ -743,9 +815,11 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) { + unsigned LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + unsigned HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg) - .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), KillFlag) - .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), KillFlag); + .addReg(HiSrc, KillFlag) + .addReg(LoSrc, KillFlag); return; } if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) { @@ -765,12 +839,14 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) { - unsigned DstHi = HRI.getSubReg(DestReg, Hexagon::subreg_hireg); - BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DstHi) - .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), KillFlag); - unsigned DstLo = HRI.getSubReg(DestReg, Hexagon::subreg_loreg); - BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DstLo) - .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), KillFlag); + unsigned HiDst = HRI.getSubReg(DestReg, Hexagon::vsub_hi); + unsigned LoDst = HRI.getSubReg(DestReg, Hexagon::vsub_lo); + unsigned HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + unsigned LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), HiDst) + .addReg(HiSrc, KillFlag); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), LoDst) + .addReg(LoSrc, KillFlag); return; } @@ -783,13 +859,12 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, llvm_unreachable("Unimplemented"); } - void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); unsigned KillFlag = getKillRegState(isKill); @@ -814,31 +889,35 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecPredRegs128BRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::STriq_pred_V6_128B)) + BuildMI(MBB, I, DL, get(Hexagon::PS_vstorerq_ai_128B)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecPredRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::STriq_pred_V6)) + BuildMI(MBB, I, DL, get(Hexagon::PS_vstorerq_ai)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating 128B vector spill"); - BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6_128B)) + unsigned Opc = Align < 128 ? Hexagon::V6_vS32Ub_ai_128B + : Hexagon::V6_vS32b_ai_128B; + BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating vector spill"); - BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6)) + unsigned Opc = Align < 64 ? Hexagon::V6_vS32Ub_ai + : Hexagon::V6_vS32b_ai; + BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating double vector spill"); - BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6)) + unsigned Opc = Align < 64 ? Hexagon::PS_vstorerwu_ai + : Hexagon::PS_vstorerw_ai; + BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating 128B double vector spill"); - BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6_128B)) + unsigned Opc = Align < 128 ? Hexagon::PS_vstorerwu_ai_128B + : Hexagon::PS_vstorerw_ai_128B; + BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else { @@ -852,7 +931,7 @@ void HexagonInstrInfo::loadRegFromStackSlot( const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -872,32 +951,43 @@ void HexagonInstrInfo::loadRegFromStackSlot( BuildMI(MBB, I, DL, get(Hexagon::LDriw_mod), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecPredRegs128BRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::LDriq_pred_V6_128B), DestReg) + BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai_128B), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecPredRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::LDriq_pred_V6), DestReg) + BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating 128B double vector restore"); - BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6_128B), DestReg) + unsigned Opc = Align < 128 ? Hexagon::PS_vloadrwu_ai_128B + : Hexagon::PS_vloadrw_ai_128B; + BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating 128B vector restore"); - BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6_128B), DestReg) + unsigned Opc = Align < 128 ? Hexagon::V6_vL32Ub_ai_128B + : Hexagon::V6_vL32b_ai_128B; + BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating vector restore"); - BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6), DestReg) + unsigned Opc = Align < 64 ? Hexagon::V6_vL32Ub_ai + : Hexagon::V6_vL32b_ai; + BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { - DEBUG(dbgs() << "++Generating double vector restore"); - BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6), DestReg) + unsigned Opc = Align < 64 ? Hexagon::PS_vloadrwu_ai + : Hexagon::PS_vloadrw_ai; + BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else { llvm_unreachable("Can't store this register to stack slot"); } } +static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) { + const MachineBasicBlock &B = *MI.getParent(); + Regs.addLiveOuts(B); + auto E = ++MachineBasicBlock::const_iterator(MI.getIterator()).getReverse(); + for (auto I = B.rbegin(); I != E; ++I) + Regs.stepBackward(*I); +} /// expandPostRAPseudo - This function is called for all pseudo instructions /// that remain after register allocation. Many pseudo instructions are @@ -912,7 +1002,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { DebugLoc DL = MI.getDebugLoc(); unsigned Opc = MI.getOpcode(); const unsigned VecOffset = 1; - bool Is128B = false; switch (Opc) { case TargetOpcode::COPY: { @@ -926,58 +1015,71 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MBBI); return true; } - case Hexagon::ALIGNA: + case Hexagon::PS_aligna: BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI.getOperand(0).getReg()) .addReg(HRI.getFrameRegister()) .addImm(-MI.getOperand(1).getImm()); MBB.erase(MI); return true; - case Hexagon::HEXAGON_V6_vassignp_128B: - case Hexagon::HEXAGON_V6_vassignp: { + case Hexagon::V6_vassignp_128B: + case Hexagon::V6_vassignp: { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned DstReg = MI.getOperand(0).getReg(); - if (SrcReg != DstReg) - copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI.getOperand(1).isKill()); + unsigned Kill = getKillRegState(MI.getOperand(1).isKill()); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vcombine), DstReg) + .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_hi), Kill) + .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_lo), Kill); MBB.erase(MI); return true; } - case Hexagon::HEXAGON_V6_lo_128B: - case Hexagon::HEXAGON_V6_lo: { + case Hexagon::V6_lo_128B: + case Hexagon::V6_lo: { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubLo); return true; } - case Hexagon::HEXAGON_V6_hi_128B: - case Hexagon::HEXAGON_V6_hi: { + case Hexagon::V6_hi_128B: + case Hexagon::V6_hi: { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubHi); return true; } - case Hexagon::STrivv_indexed_128B: - Is128B = true; - case Hexagon::STrivv_indexed: { + case Hexagon::PS_vstorerw_ai: + case Hexagon::PS_vstorerwu_ai: + case Hexagon::PS_vstorerw_ai_128B: + case Hexagon::PS_vstorerwu_ai_128B: { + bool Is128B = (Opc == Hexagon::PS_vstorerw_ai_128B || + Opc == Hexagon::PS_vstorerwu_ai_128B); + bool Aligned = (Opc == Hexagon::PS_vstorerw_ai || + Opc == Hexagon::PS_vstorerw_ai_128B); unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); - unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); - unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B - : Hexagon::V6_vS32b_ai; + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + unsigned NewOpc; + if (Aligned) + NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B + : Hexagon::V6_vS32b_ai; + else + NewOpc = Is128B ? Hexagon::V6_vS32Ub_ai_128B + : Hexagon::V6_vS32Ub_ai; + unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; MachineInstr *MI1New = - BuildMI(MBB, MI, DL, get(NewOpcd)) + BuildMI(MBB, MI, DL, get(NewOpc)) .addOperand(MI.getOperand(0)) .addImm(MI.getOperand(1).getImm()) .addReg(SrcSubLo) .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI1New->getOperand(0).setIsKill(false); - BuildMI(MBB, MI, DL, get(NewOpcd)) + BuildMI(MBB, MI, DL, get(NewOpc)) .addOperand(MI.getOperand(0)) // The Vectors are indexed in multiples of vector size. .addImm(MI.getOperand(1).getImm() + Offset) @@ -986,23 +1088,32 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; } - case Hexagon::LDrivv_pseudo_V6_128B: - case Hexagon::LDrivv_indexed_128B: - Is128B = true; - case Hexagon::LDrivv_pseudo_V6: - case Hexagon::LDrivv_indexed: { - unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B - : Hexagon::V6_vL32b_ai; + case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrwu_ai: + case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrwu_ai_128B: { + bool Is128B = (Opc == Hexagon::PS_vloadrw_ai_128B || + Opc == Hexagon::PS_vloadrwu_ai_128B); + bool Aligned = (Opc == Hexagon::PS_vloadrw_ai || + Opc == Hexagon::PS_vloadrw_ai_128B); + unsigned NewOpc; + if (Aligned) + NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32b_ai; + else + NewOpc = Is128B ? Hexagon::V6_vL32Ub_ai_128B + : Hexagon::V6_vL32Ub_ai; + unsigned DstReg = MI.getOperand(0).getReg(); unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; MachineInstr *MI1New = - BuildMI(MBB, MI, DL, get(NewOpcd), - HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + BuildMI(MBB, MI, DL, get(NewOpc), + HRI.getSubReg(DstReg, Hexagon::vsub_lo)) .addOperand(MI.getOperand(1)) .addImm(MI.getOperand(2).getImm()); MI1New->getOperand(1).setIsKill(false); - BuildMI(MBB, MI, DL, get(NewOpcd), - HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + BuildMI(MBB, MI, DL, get(NewOpc), + HRI.getSubReg(DstReg, Hexagon::vsub_hi)) .addOperand(MI.getOperand(1)) // The Vectors are indexed in multiples of vector size. .addImm(MI.getOperand(2).getImm() + Offset) @@ -1010,35 +1121,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; } - case Hexagon::LDriv_pseudo_V6_128B: - Is128B = true; - case Hexagon::LDriv_pseudo_V6: { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B - : Hexagon::V6_vL32b_ai; - int32_t Off = MI.getOperand(2).getImm(); - BuildMI(MBB, MI, DL, get(NewOpc), DstReg) - .addOperand(MI.getOperand(1)) - .addImm(Off) - .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - MBB.erase(MI); - return true; - } - case Hexagon::STriv_pseudo_V6_128B: - Is128B = true; - case Hexagon::STriv_pseudo_V6: { - unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B - : Hexagon::V6_vS32b_ai; - int32_t Off = MI.getOperand(1).getImm(); - BuildMI(MBB, MI, DL, get(NewOpc)) - .addOperand(MI.getOperand(0)) - .addImm(Off) - .addOperand(MI.getOperand(2)) - .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - MBB.erase(MI); - return true; - } - case Hexagon::TFR_PdTrue: { + case Hexagon::PS_true: { unsigned Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) .addReg(Reg, RegState::Undef) @@ -1046,7 +1129,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; } - case Hexagon::TFR_PdFalse: { + case Hexagon::PS_false: { unsigned Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) .addReg(Reg, RegState::Undef) @@ -1054,21 +1137,21 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; } - case Hexagon::VMULW: { + case Hexagon::PS_vmulw: { // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. unsigned DstReg = MI.getOperand(0).getReg(); unsigned Src1Reg = MI.getOperand(1).getReg(); unsigned Src2Reg = MI.getOperand(2).getReg(); - unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); - unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); - unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); - unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi), - HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + HRI.getSubReg(DstReg, Hexagon::isub_hi)) .addReg(Src1SubHi) .addReg(Src2SubHi); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi), - HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + HRI.getSubReg(DstReg, Hexagon::isub_lo)) .addReg(Src1SubLo) .addReg(Src2SubLo); MBB.erase(MI); @@ -1078,25 +1161,25 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MRI.clearKillFlags(Src2SubLo); return true; } - case Hexagon::VMULW_ACC: { + case Hexagon::PS_vmulw_acc: { // Expand 64-bit vector multiply with addition into 2 scalar multiplies. unsigned DstReg = MI.getOperand(0).getReg(); unsigned Src1Reg = MI.getOperand(1).getReg(); unsigned Src2Reg = MI.getOperand(2).getReg(); unsigned Src3Reg = MI.getOperand(3).getReg(); - unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); - unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); - unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); - unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); - unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); - unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); + unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi); + unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci), - HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + HRI.getSubReg(DstReg, Hexagon::isub_hi)) .addReg(Src1SubHi) .addReg(Src2SubHi) .addReg(Src3SubHi); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci), - HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + HRI.getSubReg(DstReg, Hexagon::isub_lo)) .addReg(Src1SubLo) .addReg(Src2SubLo) .addReg(Src3SubLo); @@ -1109,49 +1192,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MRI.clearKillFlags(Src3SubLo); return true; } - case Hexagon::Insert4: { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Src1Reg = MI.getOperand(1).getReg(); - unsigned Src2Reg = MI.getOperand(2).getReg(); - unsigned Src3Reg = MI.getOperand(3).getReg(); - unsigned Src4Reg = MI.getOperand(4).getReg(); - unsigned Src1RegIsKill = getKillRegState(MI.getOperand(1).isKill()); - unsigned Src2RegIsKill = getKillRegState(MI.getOperand(2).isKill()); - unsigned Src3RegIsKill = getKillRegState(MI.getOperand(3).isKill()); - unsigned Src4RegIsKill = getKillRegState(MI.getOperand(4).isKill()); - unsigned DstSubHi = HRI.getSubReg(DstReg, Hexagon::subreg_hireg); - unsigned DstSubLo = HRI.getSubReg(DstReg, Hexagon::subreg_loreg); - BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), - HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) - .addReg(DstSubLo) - .addReg(Src1Reg, Src1RegIsKill) - .addImm(16) - .addImm(0); - BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), - HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) - .addReg(DstSubLo) - .addReg(Src2Reg, Src2RegIsKill) - .addImm(16) - .addImm(16); - BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), - HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) - .addReg(DstSubHi) - .addReg(Src3Reg, Src3RegIsKill) - .addImm(16) - .addImm(0); - BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), - HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) - .addReg(DstSubHi) - .addReg(Src4Reg, Src4RegIsKill) - .addImm(16) - .addImm(16); - MBB.erase(MI); - MRI.clearKillFlags(DstReg); - MRI.clearKillFlags(DstSubHi); - MRI.clearKillFlags(DstSubLo); - return true; - } - case Hexagon::MUX64_rr: { + case Hexagon::PS_pselect: { const MachineOperand &Op0 = MI.getOperand(0); const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(2); @@ -1175,73 +1216,104 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; } - case Hexagon::VSelectPseudo_V6: { + case Hexagon::PS_vselect: + case Hexagon::PS_vselect_128B: { const MachineOperand &Op0 = MI.getOperand(0); const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(2); const MachineOperand &Op3 = MI.getOperand(3); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov)) - .addOperand(Op0) - .addOperand(Op1) - .addOperand(Op2); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov)) - .addOperand(Op0) - .addOperand(Op1) - .addOperand(Op3); + LivePhysRegs LiveAtMI(&HRI); + getLiveRegsAt(LiveAtMI, MI); + bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); + if (Op0.getReg() != Op2.getReg()) { + auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov)) + .addOperand(Op0) + .addOperand(Op1) + .addOperand(Op2); + if (IsDestLive) + T.addReg(Op0.getReg(), RegState::Implicit); + IsDestLive = true; + } + if (Op0.getReg() != Op3.getReg()) { + auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov)) + .addOperand(Op0) + .addOperand(Op1) + .addOperand(Op3); + if (IsDestLive) + T.addReg(Op0.getReg(), RegState::Implicit); + } MBB.erase(MI); return true; } - case Hexagon::VSelectDblPseudo_V6: { + case Hexagon::PS_wselect: + case Hexagon::PS_wselect_128B: { MachineOperand &Op0 = MI.getOperand(0); MachineOperand &Op1 = MI.getOperand(1); MachineOperand &Op2 = MI.getOperand(2); MachineOperand &Op3 = MI.getOperand(3); - unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::subreg_loreg); - unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::subreg_hireg); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine)) - .addOperand(Op0) - .addOperand(Op1) - .addReg(SrcHi) - .addReg(SrcLo); - SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::subreg_loreg); - SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::subreg_hireg); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine)) - .addOperand(Op0) - .addOperand(Op1) - .addReg(SrcHi) - .addReg(SrcLo); + LivePhysRegs LiveAtMI(&HRI); + getLiveRegsAt(LiveAtMI, MI); + bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); + + if (Op0.getReg() != Op2.getReg()) { + unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo); + unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi); + auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine)) + .addOperand(Op0) + .addOperand(Op1) + .addReg(SrcHi) + .addReg(SrcLo); + if (IsDestLive) + T.addReg(Op0.getReg(), RegState::Implicit); + IsDestLive = true; + } + if (Op0.getReg() != Op3.getReg()) { + unsigned SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo); + unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi); + auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine)) + .addOperand(Op0) + .addOperand(Op1) + .addReg(SrcHi) + .addReg(SrcLo); + if (IsDestLive) + T.addReg(Op0.getReg(), RegState::Implicit); + } MBB.erase(MI); return true; } - case Hexagon::TCRETURNi: + case Hexagon::PS_tailcall_i: MI.setDesc(get(Hexagon::J2_jump)); return true; - case Hexagon::TCRETURNr: + case Hexagon::PS_tailcall_r: + case Hexagon::PS_jmpret: MI.setDesc(get(Hexagon::J2_jumpr)); return true; - case Hexagon::TFRI_f: - case Hexagon::TFRI_cPt_f: - case Hexagon::TFRI_cNotPt_f: { - unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2; - APFloat FVal = MI.getOperand(Opx).getFPImm()->getValueAPF(); - APInt IVal = FVal.bitcastToAPInt(); - MI.RemoveOperand(Opx); - unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi : - (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit : - Hexagon::C2_cmoveif; - MI.setDesc(get(NewOpc)); - MI.addOperand(MachineOperand::CreateImm(IVal.getZExtValue())); + case Hexagon::PS_jmprett: + MI.setDesc(get(Hexagon::J2_jumprt)); + return true; + case Hexagon::PS_jmpretf: + MI.setDesc(get(Hexagon::J2_jumprf)); + return true; + case Hexagon::PS_jmprettnewpt: + MI.setDesc(get(Hexagon::J2_jumprtnewpt)); + return true; + case Hexagon::PS_jmpretfnewpt: + MI.setDesc(get(Hexagon::J2_jumprfnewpt)); + return true; + case Hexagon::PS_jmprettnew: + MI.setDesc(get(Hexagon::J2_jumprtnew)); + return true; + case Hexagon::PS_jmpretfnew: + MI.setDesc(get(Hexagon::J2_jumprfnew)); return true; - } } return false; } - // We indicate that we want to reverse the branch by // inserting the reversed branching opcode. -bool HexagonInstrInfo::ReverseBranchCondition( +bool HexagonInstrInfo::reverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const { if (Cond.empty()) return true; @@ -1256,13 +1328,15 @@ bool HexagonInstrInfo::ReverseBranchCondition( return false; } - void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; BuildMI(MBB, MI, DL, get(Hexagon::A2_nop)); } +bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const { + return getAddrMode(MI) == HexagonII::PostInc; +} // Returns true if an instruction is predicated irrespective of the predicate // sense. For example, all of the following will return true. @@ -1277,7 +1351,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const { return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef<MachineOperand> Cond) const { if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || @@ -1329,14 +1402,12 @@ bool HexagonInstrInfo::PredicateInstruction( return true; } - bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, ArrayRef<MachineOperand> Pred2) const { // TODO: Fix this return false; } - bool HexagonInstrInfo::DefinesPredicate( MachineInstr &MI, std::vector<MachineOperand> &Pred) const { auto &HRI = getRegisterInfo(); @@ -1353,7 +1424,6 @@ bool HexagonInstrInfo::DefinesPredicate( return false; } - bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const { return MI.getDesc().isPredicable(); } @@ -1372,6 +1442,9 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, // Throwing call is a boundary. if (MI.isCall()) { + // Don't mess around with no return calls. + if (doesNotReturn(MI)) + return true; // If any of the block's successors is a landing pad, this could be a // throwing call. for (auto I : MBB->successors()) @@ -1379,10 +1452,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, return true; } - // Don't mess around with no return calls. - if (MI.getOpcode() == Hexagon::CALLv3nr) - return true; - // Terminators and labels can't be scheduled around. if (MI.getDesc().isTerminator() || MI.isPosition()) return true; @@ -1393,7 +1462,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, return false; } - /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not @@ -1418,8 +1486,8 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, Length += MAI.getMaxInstLength(); atInsnStart = false; } - if (atInsnStart && strncmp(Str, MAI.getCommentString(), - strlen(MAI.getCommentString())) == 0) + if (atInsnStart && strncmp(Str, MAI.getCommentString().data(), + MAI.getCommentString().size()) == 0) atInsnStart = false; } @@ -1429,14 +1497,16 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, return Length; } - ScheduleHazardRecognizer* HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *DAG) const { + if (UseDFAHazardRec) { + auto &HST = DAG->MF.getSubtarget<HexagonSubtarget>(); + return new HexagonHazardRecognizer(II, this, HST); + } return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } - /// \brief For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -1529,17 +1599,15 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost) const { - return getInstrTimingClassLatency(ItinData, &MI); + return getInstrTimingClassLatency(ItinData, MI); } - DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II); } - // Inspired by this pair: // %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] // S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1] @@ -1555,16 +1623,16 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( // Instructions that are pure loads, not loads and stores like memops are not // dependent. - if (MIa.mayLoad() && !isMemOp(&MIa) && MIb.mayLoad() && !isMemOp(&MIb)) + if (MIa.mayLoad() && !isMemOp(MIa) && MIb.mayLoad() && !isMemOp(MIb)) return true; // Get base, offset, and access size in MIa. - unsigned BaseRegA = getBaseAndOffset(&MIa, OffsetA, SizeA); + unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA); if (!BaseRegA || !SizeA) return false; // Get base, offset, and access size in MIb. - unsigned BaseRegB = getBaseAndOffset(&MIb, OffsetB, SizeB); + unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB); if (!BaseRegB || !SizeB) return false; @@ -1584,8 +1652,22 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( return false; } +/// If the instruction is an increment of a constant value, return the amount. +bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, + int &Value) const { + if (isPostIncrement(MI)) { + unsigned AccessSize; + return getBaseAndOffset(MI, Value, AccessSize); + } + if (MI.getOpcode() == Hexagon::A2_addi) { + Value = MI.getOperand(2).getImm(); + return true; + } + + return false; +} -unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { +unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; if (VT == MVT::i1) { @@ -1602,58 +1684,52 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { return NewReg; } - -bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const { +bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const { return (getAddrMode(MI) == HexagonII::AbsoluteSet); } - -bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); } - -bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const { - const MachineFunction *MF = MI->getParent()->getParent(); +bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; if (!(isTC1(MI)) && !(QII->isTC2Early(MI)) - && !(MI->getDesc().mayLoad()) - && !(MI->getDesc().mayStore()) - && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe) - && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe) + && !(MI.getDesc().mayLoad()) + && !(MI.getDesc().mayStore()) + && (MI.getDesc().getOpcode() != Hexagon::S2_allocframe) + && (MI.getDesc().getOpcode() != Hexagon::L2_deallocframe) && !(QII->isMemOp(MI)) - && !(MI->isBranch()) - && !(MI->isReturn()) - && !MI->isCall()) + && !(MI.isBranch()) + && !(MI.isReturn()) + && !MI.isCall()) return true; return false; } - // Return true if the instruction is a compund branch instruction. -bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const { - return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch()); +bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const { + return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch()); } - -bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const { - return (MI->isBranch() && isPredicated(*MI)) || +bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { + return (MI.isBranch() && isPredicated(MI)) || isConditionalTransfer(MI) || isConditionalALU32(MI) || isConditionalLoad(MI) || // Predicated stores which don't have a .new on any operands. - (MI->mayStore() && isPredicated(*MI) && !isNewValueStore(MI) && - !isPredicatedNew(*MI)); + (MI.mayStore() && isPredicated(MI) && !isNewValueStore(MI) && + !isPredicatedNew(MI)); } - -bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::A2_paddf: case Hexagon::A2_paddfnew: case Hexagon::A2_paddif: @@ -1709,25 +1785,23 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const { return false; } - // FIXME - Function name and it's functionality don't match. // It should be renamed to hasPredNewOpcode() -bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const { - if (!MI->getDesc().mayLoad() || !isPredicated(*MI)) +bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { + if (!MI.getDesc().mayLoad() || !isPredicated(MI)) return false; - int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + int PNewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode()); // Instruction with valid predicated-new opcode can be promoted to .new. return PNewOpcode >= 0; } - // Returns true if an instruction is a conditional store. // // Note: It doesn't include conditional new-value stores as they can't be // converted to .new predicate. -bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const { + switch (MI.getOpcode()) { default: return false; case Hexagon::S4_storeirbt_io: case Hexagon::S4_storeirbf_io: @@ -1779,9 +1853,8 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const { } } - -bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::A2_tfrt: case Hexagon::A2_tfrf: case Hexagon::C2_cmoveit: @@ -1800,11 +1873,10 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const { return false; } - // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle // isFPImm and later getFPImm as well. -bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; if (isExtended) // Instruction must be extended. return true; @@ -1814,11 +1886,11 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { if (!isExtendable) return false; - if (MI->isCall()) + if (MI.isCall()) return false; short ExtOpNum = getCExtOpNum(MI); - const MachineOperand &MO = MI->getOperand(ExtOpNum); + const MachineOperand &MO = MI.getOperand(ExtOpNum); // Use MO operand flags to determine if MO // has the HMOTF_ConstExtended flag set. if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) @@ -1835,7 +1907,7 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { // object we are going to end up with here for now. // In the future we probably should add isSymbol(), etc. if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || - MO.isJTI() || MO.isCPI()) + MO.isJTI() || MO.isCPI() || MO.isFPImm()) return true; // If the extendable operand is not 'Immediate' type, the instruction should @@ -1849,9 +1921,8 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { return (ImmValue < MinValue || ImmValue > MaxValue); } - -bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::L4_return : case Hexagon::L4_return_t : case Hexagon::L4_return_f : @@ -1864,12 +1935,10 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { return false; } - // Return true when ConsMI uses a register defined by ProdMI. -bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI, - const MachineInstr *ConsMI) const { - const MCInstrDesc &ProdMCID = ProdMI->getDesc(); - if (!ProdMCID.getNumDefs()) +bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, + const MachineInstr &ConsMI) const { + if (!ProdMI.getDesc().getNumDefs()) return false; auto &HRI = getRegisterInfo(); @@ -1902,10 +1971,9 @@ bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI, return false; } - // Returns true if the instruction is alread a .cur. -bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::V6_vL32b_cur_pi: case Hexagon::V6_vL32b_cur_ai: case Hexagon::V6_vL32b_cur_pi_128B: @@ -1915,47 +1983,39 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const { return false; } - // Returns true, if any one of the operands is a dot new // insn, whether it is predicated dot new or register dot new. -bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const { - if (isNewValueInst(MI) || (isPredicated(*MI) && isPredicatedNew(*MI))) +bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const { + if (isNewValueInst(MI) || (isPredicated(MI) && isPredicatedNew(MI))) return true; return false; } - /// Symmetrical. See if these two instructions are fit for duplex pair. -bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa, - const MachineInstr *MIb) const { +bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa, + const MachineInstr &MIb) const { HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa); HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb); return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); } - -bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const { - if (!MI) - return false; - - if (MI->mayLoad() || MI->mayStore() || MI->isCompare()) +bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { + if (MI.mayLoad() || MI.mayStore() || MI.isCompare()) return true; // Multiply - unsigned SchedClass = MI->getDesc().getSchedClass(); + unsigned SchedClass = MI.getDesc().getSchedClass(); if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23) return true; return false; } - bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { return (Opcode == Hexagon::ENDLOOP0 || Opcode == Hexagon::ENDLOOP1); } - bool HexagonInstrInfo::isExpr(unsigned OpType) const { switch(OpType) { case MachineOperand::MO_MachineBasicBlock: @@ -1970,18 +2030,18 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const { } } - -bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { - const MCInstrDesc &MID = MI->getDesc(); +bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const { + const MCInstrDesc &MID = MI.getDesc(); const uint64_t F = MID.TSFlags; if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) return true; // TODO: This is largely obsolete now. Will need to be removed // in consecutive patches. - switch(MI->getOpcode()) { - // TFR_FI Remains a special case. - case Hexagon::TFR_FI: + switch (MI.getOpcode()) { + // PS_fi and PS_fia remain special cases. + case Hexagon::PS_fi: + case Hexagon::PS_fia: return true; default: return false; @@ -1989,57 +2049,53 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { return false; } - // This returns true in two cases: // - The OP code itself indicates that this is an extended instruction. // - One of MOs has been marked with HMOTF_ConstExtended flag. -bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { +bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const { // First check if this is permanently extended op code. - const uint64_t F = MI->getDesc().TSFlags; + const uint64_t F = MI.getDesc().TSFlags; if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) return true; // Use MO operand flags to determine if one of MI's operands // has HMOTF_ConstExtended flag set. - for (MachineInstr::const_mop_iterator I = MI->operands_begin(), - E = MI->operands_end(); I != E; ++I) { + for (MachineInstr::const_mop_iterator I = MI.operands_begin(), + E = MI.operands_end(); I != E; ++I) { if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) return true; } return false; } - -bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const { - unsigned Opcode = MI->getOpcode(); +bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::FPPos) & HexagonII::FPMask; } - // No V60 HVX VMEM with A_INDIRECT. -bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I, - const MachineInstr *J) const { +bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, + const MachineInstr &J) const { if (!isV60VectorInstruction(I)) return false; - if (!I->mayLoad() && !I->mayStore()) + if (!I.mayLoad() && !I.mayStore()) return false; - return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); + return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); } - -bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::J2_callr : case Hexagon::J2_callrf : case Hexagon::J2_callrt : + case Hexagon::PS_call_nr : return true; } return false; } - -bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::L4_return : case Hexagon::L4_return_t : case Hexagon::L4_return_f : @@ -2052,9 +2108,8 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const { return false; } - -bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::J2_jumpr : case Hexagon::J2_jumprt : case Hexagon::J2_jumprf : @@ -2067,25 +2122,24 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const { return false; } - -// Return true if a given MI can accomodate given offset. +// Return true if a given MI can accommodate given offset. // Use abs estimate as oppose to the exact number. // TODO: This will need to be changed to use MC level // definition of instruction extendable field size. -bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI, +bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, unsigned offset) const { // This selection of jump instructions matches to that what // AnalyzeBranch can parse, plus NVJ. if (isNewValueJump(MI)) // r9:2 return isInt<11>(offset); - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { // Still missing Jump to address condition on register value. default: return false; case Hexagon::J2_jump: // bits<24> dst; // r22:2 case Hexagon::J2_call: - case Hexagon::CALLv3nr: + case Hexagon::PS_call_nr: return isInt<24>(offset); case Hexagon::J2_jumpt: //bits<17> dst; // r15:2 case Hexagon::J2_jumpf: @@ -2112,19 +2166,15 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI, } } - -bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, - const MachineInstr *ESMI) const { - if (!LRMI || !ESMI) - return false; - +bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, + const MachineInstr &ESMI) const { bool isLate = isLateResultInstr(LRMI); bool isEarly = isEarlySourceInstr(ESMI); DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- ")); - DEBUG(LRMI->dump()); + DEBUG(LRMI.dump()); DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- ")); - DEBUG(ESMI->dump()); + DEBUG(ESMI.dump()); if (isLate && isEarly) { DEBUG(dbgs() << "++Is Late Result feeding Early Source\n"); @@ -2134,12 +2184,8 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, return false; } - -bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const { - if (!MI) - return false; - - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: @@ -2153,7 +2199,7 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const { break; } - unsigned SchedClass = MI->getDesc().getSchedClass(); + unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: @@ -2174,19 +2220,14 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const { return true; } - -bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const { - if (!MI) - return false; - +bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const { // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply // resource, but all operands can be received late like an ALU instruction. - return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; + return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; } - -bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const { - unsigned Opcode = MI->getOpcode(); +bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); return Opcode == Hexagon::J2_loop0i || Opcode == Hexagon::J2_loop0r || Opcode == Hexagon::J2_loop0iext || @@ -2197,9 +2238,8 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const { Opcode == Hexagon::J2_loop1rext; } - -bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { default: return false; case Hexagon::L4_iadd_memopw_io : case Hexagon::L4_isub_memopw_io : @@ -2230,81 +2270,64 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const { return false; } - -bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - bool HexagonInstrInfo::isNewValue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - -bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { +bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const { return isNewValueJump(MI) || isNewValueStore(MI); } - -bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { - return isNewValue(MI) && MI->isBranch(); +bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const { + return isNewValue(MI) && MI.isBranch(); } - bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const { return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); } - -bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - // Returns true if a particular operand is extendable for an instruction. -bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, +bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI, unsigned OperandNum) const { - const uint64_t F = MI->getDesc().TSFlags; + const uint64_t F = MI.getDesc().TSFlags; return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) == OperandNum; } - -bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const { - return getAddrMode(MI) == HexagonII::PostInc; -} - - bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; assert(isPredicated(MI)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(isPredicated(Opcode)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; // Make sure that the instruction is predicated. @@ -2313,19 +2336,16 @@ bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask; } - bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(get(Opcode).isBranch() && @@ -2333,12 +2353,11 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { return (F >> HexagonII::TakenPos) & HexagonII::TakenMask; } - -bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { - return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || - MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT || - MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_PIC || - MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC; +bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const { + return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || + MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT || + MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_PIC || + MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC; } bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const { @@ -2419,15 +2438,13 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const { } } - -bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::SoloPos) & HexagonII::SoloMask; } - -bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case Hexagon::STriw_pred : case Hexagon::LDriw_pred : return true; @@ -2436,21 +2453,19 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const { } } - -bool HexagonInstrInfo::isTailCall(const MachineInstr *MI) const { - if (!MI->isBranch()) +bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { + if (!MI.isBranch()) return false; - for (auto &Op : MI->operands()) + for (auto &Op : MI.operands()) if (Op.isGlobal() || Op.isSymbol()) return true; return false; } - // Returns true when SU has a timing class TC1. -bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const { - unsigned SchedClass = MI->getDesc().getSchedClass(); +bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { + unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: @@ -2467,9 +2482,8 @@ bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const { } } - -bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const { - unsigned SchedClass = MI->getDesc().getSchedClass(); +bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { + unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: case Hexagon::Sched::ALU64_tc_2_SLOT23: @@ -2484,9 +2498,8 @@ bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const { } } - -bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const { - unsigned SchedClass = MI->getDesc().getSchedClass(); +bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { + unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123: case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123: @@ -2505,47 +2518,35 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const { } } - -bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const { - if (!MI) - return false; - - unsigned SchedClass = MI->getDesc().getSchedClass(); +bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const { + unsigned SchedClass = MI.getDesc().getSchedClass(); return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; } - // Schedule this ASAP. -bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr *MI1, - const MachineInstr *MI2) const { - if (!MI1 || !MI2) - return false; +bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, + const MachineInstr &MI2) const { if (mayBeCurLoad(MI1)) { // if (result of SU is used in Next) return true; - unsigned DstReg = MI1->getOperand(0).getReg(); - int N = MI2->getNumOperands(); + unsigned DstReg = MI1.getOperand(0).getReg(); + int N = MI2.getNumOperands(); for (int I = 0; I < N; I++) - if (MI2->getOperand(I).isReg() && DstReg == MI2->getOperand(I).getReg()) + if (MI2.getOperand(I).isReg() && DstReg == MI2.getOperand(I).getReg()) return true; } if (mayBeNewStore(MI2)) - if (MI2->getOpcode() == Hexagon::V6_vS32b_pi) - if (MI1->getOperand(0).isReg() && MI2->getOperand(3).isReg() && - MI1->getOperand(0).getReg() == MI2->getOperand(3).getReg()) + if (MI2.getOpcode() == Hexagon::V6_vS32b_pi) + if (MI1.getOperand(0).isReg() && MI2.getOperand(3).isReg() && + MI1.getOperand(0).getReg() == MI2.getOperand(3).getReg()) return true; return false; } - -bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const { - if (!MI) - return false; - +bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const { const uint64_t V = getType(MI); return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; } - // Check if the Offset is a valid auto-inc imm by Load/Store Type. // bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { @@ -2584,7 +2585,6 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { llvm_unreachable("Not an auto-inc opc!"); } - bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, bool Extend) const { // This function is to check whether the "Offset" is in the correct range of @@ -2597,16 +2597,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, // misaligns with respect to load size. switch (Opcode) { - case Hexagon::STriq_pred_V6: - case Hexagon::STriq_pred_vec_V6: - case Hexagon::STriv_pseudo_V6: - case Hexagon::STrivv_pseudo_V6: - case Hexagon::LDriq_pred_V6: - case Hexagon::LDriq_pred_vec_V6: - case Hexagon::LDriv_pseudo_V6: - case Hexagon::LDrivv_pseudo_V6: - case Hexagon::LDrivv_indexed: - case Hexagon::STrivv_indexed: + case Hexagon::PS_vstorerq_ai: + case Hexagon::PS_vstorerw_ai: + case Hexagon::PS_vloadrq_ai: + case Hexagon::PS_vloadrw_ai: case Hexagon::V6_vL32b_ai: case Hexagon::V6_vS32b_ai: case Hexagon::V6_vL32Ub_ai: @@ -2614,16 +2608,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, return (Offset >= Hexagon_MEMV_OFFSET_MIN) && (Offset <= Hexagon_MEMV_OFFSET_MAX); - case Hexagon::STriq_pred_V6_128B: - case Hexagon::STriq_pred_vec_V6_128B: - case Hexagon::STriv_pseudo_V6_128B: - case Hexagon::STrivv_pseudo_V6_128B: - case Hexagon::LDriq_pred_V6_128B: - case Hexagon::LDriq_pred_vec_V6_128B: - case Hexagon::LDriv_pseudo_V6_128B: - case Hexagon::LDrivv_pseudo_V6_128B: - case Hexagon::LDrivv_indexed_128B: - case Hexagon::STrivv_indexed_128B: + case Hexagon::PS_vstorerq_ai_128B: + case Hexagon::PS_vstorerw_ai_128B: + case Hexagon::PS_vloadrq_ai_128B: + case Hexagon::PS_vloadrw_ai_128B: case Hexagon::V6_vL32b_ai_128B: case Hexagon::V6_vS32b_ai_128B: case Hexagon::V6_vL32Ub_ai_128B: @@ -2713,8 +2701,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::LDriw_mod: return true; - case Hexagon::TFR_FI: - case Hexagon::TFR_FIA: + case Hexagon::PS_fi: + case Hexagon::PS_fia: case Hexagon::INLINEASM: return true; @@ -2751,25 +2739,20 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, "Please define it in the above switch statement!"); } - -bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const { - return MI && isV60VectorInstruction(MI) && isAccumulator(MI); +bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const { + return isV60VectorInstruction(MI) && isAccumulator(MI); } - -bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const { - if (!MI) - return false; - const uint64_t F = get(MI->getOpcode()).TSFlags; +bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { + const uint64_t F = get(MI.getOpcode()).TSFlags; const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask); return V == HexagonII::TypeCVI_VA || V == HexagonII::TypeCVI_VA_DV; } - -bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI, - const MachineInstr *ConsMI) const { +bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI, + const MachineInstr &ConsMI) const { if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI)) return true; @@ -2860,31 +2843,40 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const { } } - // Add latency to instruction. -bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr *MI1, - const MachineInstr *MI2) const { +bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, + const MachineInstr &MI2) const { if (isV60VectorInstruction(MI1) && isV60VectorInstruction(MI2)) if (!isVecUsableNextPacket(MI1, MI2)) return true; return false; } +/// \brief Get the base register and byte offset of a load/store instr. +bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, + unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) + const { + unsigned AccessSize = 0; + int OffsetVal = 0; + BaseReg = getBaseAndOffset(LdSt, OffsetVal, AccessSize); + Offset = OffsetVal; + return BaseReg != 0; +} /// \brief Can these instructions execute at the same time in a bundle. -bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First, - const MachineInstr *Second) const { +bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, + const MachineInstr &Second) const { if (DisableNVSchedule) return false; if (mayBeNewStore(Second)) { // Make sure the definition of the first instruction is the value being // stored. const MachineOperand &Stored = - Second->getOperand(Second->getNumOperands() - 1); + Second.getOperand(Second.getNumOperands() - 1); if (!Stored.isReg()) return false; - for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) { - const MachineOperand &Op = First->getOperand(i); + for (unsigned i = 0, e = First.getNumOperands(); i < e; ++i) { + const MachineOperand &Op = First.getOperand(i); if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg()) return true; } @@ -2892,6 +2884,10 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First, return false; } +bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const { + unsigned Opc = CallMI.getOpcode(); + return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr; +} bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { for (auto &I : *B) @@ -2900,33 +2896,32 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { return false; } - // Returns true if an instruction can be converted into a non-extended // equivalent instruction. -bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const { +bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { short NonExtOpcode; // Check if the instruction has a register form that uses register in place // of the extended operand, if so return that as the non-extended form. - if (Hexagon::getRegForm(MI->getOpcode()) >= 0) + if (Hexagon::getRegForm(MI.getOpcode()) >= 0) return true; - if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + if (MI.getDesc().mayLoad() || MI.getDesc().mayStore()) { // Check addressing mode and retrieve non-ext equivalent instruction. switch (getAddrMode(MI)) { case HexagonII::Absolute : // Load/store with absolute addressing mode can be converted into // base+offset mode. - NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode()); + NonExtOpcode = Hexagon::getBaseWithImmOffset(MI.getOpcode()); break; case HexagonII::BaseImmOffset : // Load/store with base+offset addressing mode can be converted into // base+register offset addressing mode. However left shift operand should // be set to 0. - NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); + NonExtOpcode = Hexagon::getBaseWithRegOffset(MI.getOpcode()); break; case HexagonII::BaseLongOffset: - NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode()); + NonExtOpcode = Hexagon::getRegShlForm(MI.getOpcode()); break; default: return false; @@ -2938,13 +2933,11 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const { return false; } - -bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const { - return Hexagon::getRealHWInstr(MI->getOpcode(), +bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const { + return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo) >= 0; } - bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) const { MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); @@ -2956,25 +2949,22 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) return false; } - // Returns true, if a LD insn can be promoted to a cur load. -bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const { - auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>(); - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { + auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>(); + const uint64_t F = MI.getDesc().TSFlags; return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) && HST.hasV60TOps(); } - // Returns true, if a ST insn can be promoted to a new-value store. -bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; } - -bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI, - const MachineInstr *ConsMI) const { +bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, + const MachineInstr &ConsMI) const { // There is no stall when ProdMI is not a V60 vector. if (!isV60VectorInstruction(ProdMI)) return false; @@ -2991,8 +2981,7 @@ bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI, return true; } - -bool HexagonInstrInfo::producesStall(const MachineInstr *MI, +bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator BII) const { // There is no stall when I is not a V60 vector. if (!isV60VectorInstruction(MI)) @@ -3001,8 +2990,8 @@ bool HexagonInstrInfo::producesStall(const MachineInstr *MI, MachineBasicBlock::const_instr_iterator MII = BII; MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); - if (!(*MII).isBundle()) { - const MachineInstr *J = &*MII; + if (!MII->isBundle()) { + const MachineInstr &J = *MII; if (!isV60VectorInstruction(J)) return false; else if (isVecUsableNextPacket(J, MI)) @@ -3011,18 +3000,17 @@ bool HexagonInstrInfo::producesStall(const MachineInstr *MI, } for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { - const MachineInstr *J = &*MII; + const MachineInstr &J = *MII; if (producesStall(J, MI)) return true; } return false; } - -bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI, +bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, unsigned PredReg) const { - for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { - const MachineOperand &MO = MI->getOperand(opNum); + for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) { + const MachineOperand &MO = MI.getOperand(opNum); if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg)) return false; // Predicate register must be explicitly defined. } @@ -3030,10 +3018,9 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI, // Hexagon Programmer's Reference says that decbin, memw_locked, and // memd_locked cannot be used as .new as well, // but we don't seem to have these instructions defined. - return MI->getOpcode() != Hexagon::A4_tlbmatch; + return MI.getOpcode() != Hexagon::A4_tlbmatch; } - bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { return (Opcode == Hexagon::J2_jumpt) || (Opcode == Hexagon::J2_jumpf) || @@ -3043,28 +3030,24 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { (Opcode == Hexagon::J2_jumpfnewpt); } - bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const { if (Cond.empty() || !isPredicated(Cond[0].getImm())) return false; return !isPredicatedTrue(Cond[0].getImm()); } - -short HexagonInstrInfo::getAbsoluteForm(const MachineInstr *MI) const { - return Hexagon::getAbsoluteForm(MI->getOpcode()); +short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const { + return Hexagon::getAbsoluteForm(MI.getOpcode()); } - -unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; +unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; } - // Returns the base register in a memory access (load/store). The offset is // returned in Offset and the access size is returned in AccessSize. -unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI, +unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, int &Offset, unsigned &AccessSize) const { // Return if it is not a base+offset type instruction or a MemOp. if (getAddrMode(MI) != HexagonII::BaseImmOffset && @@ -3092,30 +3075,29 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI, if (isPostIncrement(MI)) Offset = 0; else { - Offset = MI->getOperand(offsetPos).getImm(); + Offset = MI.getOperand(offsetPos).getImm(); } - return MI->getOperand(basePos).getReg(); + return MI.getOperand(basePos).getReg(); } - /// Return the position of the base and offset operands for this instruction. -bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI, +bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const { // Deal with memops first. if (isMemOp(MI)) { BasePos = 0; OffsetPos = 1; - } else if (MI->mayStore()) { + } else if (MI.mayStore()) { BasePos = 0; OffsetPos = 1; - } else if (MI->mayLoad()) { + } else if (MI.mayLoad()) { BasePos = 1; OffsetPos = 2; } else return false; - if (isPredicated(*MI)) { + if (isPredicated(MI)) { BasePos++; OffsetPos++; } @@ -3124,14 +3106,13 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI, OffsetPos++; } - if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm()) + if (!MI.getOperand(BasePos).isReg() || !MI.getOperand(OffsetPos).isImm()) return false; return true; } - -// Inserts branching instructions in reverse order of their occurence. +// Inserts branching instructions in reverse order of their occurrence. // e.g. jump_t t1 (i1) // jump t2 (i2) // Jumpers = {i2, i1} @@ -3192,37 +3173,33 @@ SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs( return Jumpers; } - short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const { if (Opcode < 0) return -1; return Hexagon::getBaseWithLongOffset(Opcode); } - -short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr *MI) const { - return Hexagon::getBaseWithLongOffset(MI->getOpcode()); +short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const { + return Hexagon::getBaseWithLongOffset(MI.getOpcode()); } - -short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr *MI) const { - return Hexagon::getBaseWithRegOffset(MI->getOpcode()); +short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const { + return Hexagon::getBaseWithRegOffset(MI.getOpcode()); } - // Returns Operand Index for the constant extended instruction. -unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask; } // See if instruction could potentially be a duplex candidate. // If so, return its group. Zero otherwise. HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( - const MachineInstr *MI) const { + const MachineInstr &MI) const { unsigned DstReg, SrcReg, Src1Reg, Src2Reg; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: return HexagonII::HCG_None; // @@ -3234,9 +3211,9 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( case Hexagon::C2_cmpeq: case Hexagon::C2_cmpgt: case Hexagon::C2_cmpgtu: - DstReg = MI->getOperand(0).getReg(); - Src1Reg = MI->getOperand(1).getReg(); - Src2Reg = MI->getOperand(2).getReg(); + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + Src2Reg = MI.getOperand(2).getReg(); if (Hexagon::PredRegsRegClass.contains(DstReg) && (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg)) @@ -3246,19 +3223,19 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( case Hexagon::C2_cmpgti: case Hexagon::C2_cmpgtui: // P0 = cmp.eq(Rs,#u2) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (Hexagon::PredRegsRegClass.contains(DstReg) && (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && - isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && - ((isUInt<5>(MI->getOperand(2).getImm())) || - (MI->getOperand(2).getImm() == -1))) + isIntRegForSubInst(SrcReg) && MI.getOperand(2).isImm() && + ((isUInt<5>(MI.getOperand(2).getImm())) || + (MI.getOperand(2).getImm() == -1))) return HexagonII::HCG_A; break; case Hexagon::A2_tfr: // Rd = Rs - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) return HexagonII::HCG_A; break; @@ -3266,17 +3243,17 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( // Rd = #u6 // Do not test for #u6 size since the const is getting extended // regardless and compound could be formed. - DstReg = MI->getOperand(0).getReg(); + DstReg = MI.getOperand(0).getReg(); if (isIntRegForSubInst(DstReg)) return HexagonII::HCG_A; break; case Hexagon::S2_tstbit_i: - DstReg = MI->getOperand(0).getReg(); - Src1Reg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); if (Hexagon::PredRegsRegClass.contains(DstReg) && (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && - MI->getOperand(2).isImm() && - isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0)) + MI.getOperand(2).isImm() && + isIntRegForSubInst(Src1Reg) && (MI.getOperand(2).getImm() == 0)) return HexagonII::HCG_A; break; // The fact that .new form is used pretty much guarantees @@ -3287,7 +3264,7 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( case Hexagon::J2_jumpfnew: case Hexagon::J2_jumptnewpt: case Hexagon::J2_jumpfnewpt: - Src1Reg = MI->getOperand(0).getReg(); + Src1Reg = MI.getOperand(0).getReg(); if (Hexagon::PredRegsRegClass.contains(Src1Reg) && (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg)) return HexagonII::HCG_B; @@ -3298,6 +3275,7 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( // Do not test for jump range here. case Hexagon::J2_jump: case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: return HexagonII::HCG_C; break; } @@ -3305,17 +3283,16 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( return HexagonII::HCG_None; } - // Returns -1 when there is no opcode found. -unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA, - const MachineInstr *GB) const { +unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, + const MachineInstr &GB) const { assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A); assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B); - if ((GA->getOpcode() != Hexagon::C2_cmpeqi) || - (GB->getOpcode() != Hexagon::J2_jumptnew)) + if ((GA.getOpcode() != Hexagon::C2_cmpeqi) || + (GB.getOpcode() != Hexagon::J2_jumptnew)) return -1; - unsigned DestReg = GA->getOperand(0).getReg(); - if (!GB->readsRegister(DestReg)) + unsigned DestReg = GA.getOperand(0).getReg(); + if (!GB.readsRegister(DestReg)) return -1; if (DestReg == Hexagon::P0) return Hexagon::J4_cmpeqi_tp0_jump_nt; @@ -3324,7 +3301,6 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA, return -1; } - int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : @@ -3333,21 +3309,12 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { if (CondOpcode >= 0) // Valid Conditional opcode/instruction return CondOpcode; - // This switch case will be removed once all the instructions have been - // modified to use relation maps. - switch(Opc) { - case Hexagon::TFRI_f: - return !invertPredicate ? Hexagon::TFRI_cPt_f : - Hexagon::TFRI_cNotPt_f; - } - llvm_unreachable("Unexpected predicable instruction"); } - // Return the cur value instruction for a given store. -int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const { - switch (MI->getOpcode()) { +int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { default: llvm_unreachable("Unknown .cur type"); case Hexagon::V6_vL32b_pi: return Hexagon::V6_vL32b_cur_pi; @@ -3362,8 +3329,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const { return 0; } - - // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // @@ -3443,14 +3408,13 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const { // promoted. Therefore, in case of dependence check failure (due to R5) during // next iteration, it should be converted back to its most basic form. - // Return the new value instruction for a given store. -int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const { - int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode()); +int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { + int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode()); if (NVOpcode >= 0) // Valid new-value store instruction. return NVOpcode; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: llvm_unreachable("Unknown .new type"); case Hexagon::S4_storerb_ur: return Hexagon::S4_storerbnew_ur; @@ -3486,23 +3450,22 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const { return 0; } - // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. -int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI, +int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const { // We assume that block can have at most two successors. bool taken = false; - const MachineBasicBlock *Src = MI->getParent(); - const MachineOperand *BrTarget = &MI->getOperand(1); - const MachineBasicBlock *Dst = BrTarget->getMBB(); + const MachineBasicBlock *Src = MI.getParent(); + const MachineOperand &BrTarget = MI.getOperand(1); + const MachineBasicBlock *Dst = BrTarget.getMBB(); const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst); if (Prediction >= BranchProbability(1,2)) taken = true; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case Hexagon::J2_jumpt: return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew; case Hexagon::J2_jumpf: @@ -3513,15 +3476,14 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI, } } - // Return .new predicate version for an instruction. -int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI, +int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const { - int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode()); if (NewOpcode >= 0) // Valid predicate new instruction return NewOpcode; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { // Condtional Jumps case Hexagon::J2_jumpt: case Hexagon::J2_jumpf: @@ -3533,7 +3495,6 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI, return 0; } - int HexagonInstrInfo::getDotOldOp(const int opc) const { int NewOp = opc; if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form @@ -3549,15 +3510,14 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const { return NewOp; } - // See if instruction could potentially be a duplex candidate. // If so, return its group. Zero otherwise. HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( - const MachineInstr *MI) const { + const MachineInstr &MI) const { unsigned DstReg, SrcReg, Src1Reg, Src2Reg; auto &HRI = getRegisterInfo(); - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: return HexagonII::HSIG_None; // @@ -3566,29 +3526,29 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // Rd = memw(Rs+#u4:2) // Rd = memub(Rs+#u4:0) case Hexagon::L2_loadri_io: - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); // Special case this one from Group L2. // Rd = memw(r29+#u5:2) if (isIntRegForSubInst(DstReg)) { if (Hexagon::IntRegsRegClass.contains(SrcReg) && HRI.getStackRegister() == SrcReg && - MI->getOperand(2).isImm() && - isShiftedUInt<5,2>(MI->getOperand(2).getImm())) + MI.getOperand(2).isImm() && + isShiftedUInt<5,2>(MI.getOperand(2).getImm())) return HexagonII::HSIG_L2; // Rd = memw(Rs+#u4:2) if (isIntRegForSubInst(SrcReg) && - (MI->getOperand(2).isImm() && - isShiftedUInt<4,2>(MI->getOperand(2).getImm()))) + (MI.getOperand(2).isImm() && + isShiftedUInt<4,2>(MI.getOperand(2).getImm()))) return HexagonII::HSIG_L1; } break; case Hexagon::L2_loadrub_io: // Rd = memub(Rs+#u4:0) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && - MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm())) + MI.getOperand(2).isImm() && isUInt<4>(MI.getOperand(2).getImm())) return HexagonII::HSIG_L1; break; // @@ -3604,61 +3564,62 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::L2_loadrh_io: case Hexagon::L2_loadruh_io: // Rd = memh/memuh(Rs+#u3:1) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && - MI->getOperand(2).isImm() && - isShiftedUInt<3,1>(MI->getOperand(2).getImm())) + MI.getOperand(2).isImm() && + isShiftedUInt<3,1>(MI.getOperand(2).getImm())) return HexagonII::HSIG_L2; break; case Hexagon::L2_loadrb_io: // Rd = memb(Rs+#u3:0) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && - MI->getOperand(2).isImm() && - isUInt<3>(MI->getOperand(2).getImm())) + MI.getOperand(2).isImm() && + isUInt<3>(MI.getOperand(2).getImm())) return HexagonII::HSIG_L2; break; case Hexagon::L2_loadrd_io: // Rdd = memd(r29+#u5:3) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isDblRegForSubInst(DstReg, HRI) && Hexagon::IntRegsRegClass.contains(SrcReg) && HRI.getStackRegister() == SrcReg && - MI->getOperand(2).isImm() && - isShiftedUInt<5,3>(MI->getOperand(2).getImm())) + MI.getOperand(2).isImm() && + isShiftedUInt<5,3>(MI.getOperand(2).getImm())) return HexagonII::HSIG_L2; break; // dealloc_return is not documented in Hexagon Manual, but marked // with A_SUBINSN attribute in iset_v4classic.py. case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: case Hexagon::L4_return: case Hexagon::L2_deallocframe: return HexagonII::HSIG_L2; case Hexagon::EH_RETURN_JMPR: - case Hexagon::JMPret : + case Hexagon::PS_jmpret: // jumpr r31 // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>. - DstReg = MI->getOperand(0).getReg(); + DstReg = MI.getOperand(0).getReg(); if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)) return HexagonII::HSIG_L2; break; - case Hexagon::JMPrett: - case Hexagon::JMPretf: - case Hexagon::JMPrettnewpt: - case Hexagon::JMPretfnewpt : - case Hexagon::JMPrettnew : - case Hexagon::JMPretfnew : - DstReg = MI->getOperand(1).getReg(); - SrcReg = MI->getOperand(0).getReg(); + case Hexagon::PS_jmprett: + case Hexagon::PS_jmpretf: + case Hexagon::PS_jmprettnewpt: + case Hexagon::PS_jmpretfnewpt: + case Hexagon::PS_jmprettnew: + case Hexagon::PS_jmpretfnew: + DstReg = MI.getOperand(1).getReg(); + SrcReg = MI.getOperand(0).getReg(); // [if ([!]p0[.new])] jumpr r31 if ((Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg)) && (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))) return HexagonII::HSIG_L2; - break; + break; case Hexagon::L4_return_t : case Hexagon::L4_return_f : case Hexagon::L4_return_tnew_pnt : @@ -3666,7 +3627,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::L4_return_tnew_pt : case Hexagon::L4_return_fnew_pt : // [if ([!]p0[.new])] dealloc_return - SrcReg = MI->getOperand(0).getReg(); + SrcReg = MI.getOperand(0).getReg(); if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg)) return HexagonII::HSIG_L2; break; @@ -3678,25 +3639,25 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::S2_storeri_io: // Special case this one from Group S2. // memw(r29+#u5:2) = Rt - Src1Reg = MI->getOperand(0).getReg(); - Src2Reg = MI->getOperand(2).getReg(); + Src1Reg = MI.getOperand(0).getReg(); + Src2Reg = MI.getOperand(2).getReg(); if (Hexagon::IntRegsRegClass.contains(Src1Reg) && isIntRegForSubInst(Src2Reg) && - HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && - isShiftedUInt<5,2>(MI->getOperand(1).getImm())) + HRI.getStackRegister() == Src1Reg && MI.getOperand(1).isImm() && + isShiftedUInt<5,2>(MI.getOperand(1).getImm())) return HexagonII::HSIG_S2; // memw(Rs+#u4:2) = Rt if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && - MI->getOperand(1).isImm() && - isShiftedUInt<4,2>(MI->getOperand(1).getImm())) + MI.getOperand(1).isImm() && + isShiftedUInt<4,2>(MI.getOperand(1).getImm())) return HexagonII::HSIG_S1; break; case Hexagon::S2_storerb_io: // memb(Rs+#u4:0) = Rt - Src1Reg = MI->getOperand(0).getReg(); - Src2Reg = MI->getOperand(2).getReg(); + Src1Reg = MI.getOperand(0).getReg(); + Src2Reg = MI.getOperand(2).getReg(); if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && - MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm())) + MI.getOperand(1).isImm() && isUInt<4>(MI.getOperand(1).getImm())) return HexagonII::HSIG_S1; break; // @@ -3710,42 +3671,42 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // allocframe(#u5:3) case Hexagon::S2_storerh_io: // memh(Rs+#u3:1) = Rt - Src1Reg = MI->getOperand(0).getReg(); - Src2Reg = MI->getOperand(2).getReg(); + Src1Reg = MI.getOperand(0).getReg(); + Src2Reg = MI.getOperand(2).getReg(); if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && - MI->getOperand(1).isImm() && - isShiftedUInt<3,1>(MI->getOperand(1).getImm())) + MI.getOperand(1).isImm() && + isShiftedUInt<3,1>(MI.getOperand(1).getImm())) return HexagonII::HSIG_S1; break; case Hexagon::S2_storerd_io: // memd(r29+#s6:3) = Rtt - Src1Reg = MI->getOperand(0).getReg(); - Src2Reg = MI->getOperand(2).getReg(); + Src1Reg = MI.getOperand(0).getReg(); + Src2Reg = MI.getOperand(2).getReg(); if (isDblRegForSubInst(Src2Reg, HRI) && Hexagon::IntRegsRegClass.contains(Src1Reg) && - HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && - isShiftedInt<6,3>(MI->getOperand(1).getImm())) + HRI.getStackRegister() == Src1Reg && MI.getOperand(1).isImm() && + isShiftedInt<6,3>(MI.getOperand(1).getImm())) return HexagonII::HSIG_S2; break; case Hexagon::S4_storeiri_io: // memw(Rs+#u4:2) = #U1 - Src1Reg = MI->getOperand(0).getReg(); - if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && - isShiftedUInt<4,2>(MI->getOperand(1).getImm()) && - MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + Src1Reg = MI.getOperand(0).getReg(); + if (isIntRegForSubInst(Src1Reg) && MI.getOperand(1).isImm() && + isShiftedUInt<4,2>(MI.getOperand(1).getImm()) && + MI.getOperand(2).isImm() && isUInt<1>(MI.getOperand(2).getImm())) return HexagonII::HSIG_S2; break; case Hexagon::S4_storeirb_io: // memb(Rs+#u4) = #U1 - Src1Reg = MI->getOperand(0).getReg(); + Src1Reg = MI.getOperand(0).getReg(); if (isIntRegForSubInst(Src1Reg) && - MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()) && - MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + MI.getOperand(1).isImm() && isUInt<4>(MI.getOperand(1).getImm()) && + MI.getOperand(2).isImm() && isUInt<1>(MI.getOperand(2).getImm())) return HexagonII::HSIG_S2; break; case Hexagon::S2_allocframe: - if (MI->getOperand(0).isImm() && - isShiftedUInt<5,3>(MI->getOperand(0).getImm())) + if (MI.getOperand(0).isImm() && + isShiftedUInt<5,3>(MI.getOperand(0).getImm())) return HexagonII::HSIG_S1; break; // @@ -3767,31 +3728,31 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // Rd = sxth/sxtb/zxtb/zxth(Rs) // Rd = and(Rs,#1) case Hexagon::A2_addi: - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg)) { // Rd = add(r29,#u6:2) if (Hexagon::IntRegsRegClass.contains(SrcReg) && - HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() && - isShiftedUInt<6,2>(MI->getOperand(2).getImm())) + HRI.getStackRegister() == SrcReg && MI.getOperand(2).isImm() && + isShiftedUInt<6,2>(MI.getOperand(2).getImm())) return HexagonII::HSIG_A; // Rx = add(Rx,#s7) - if ((DstReg == SrcReg) && MI->getOperand(2).isImm() && - isInt<7>(MI->getOperand(2).getImm())) + if ((DstReg == SrcReg) && MI.getOperand(2).isImm() && + isInt<7>(MI.getOperand(2).getImm())) return HexagonII::HSIG_A; // Rd = add(Rs,#1) // Rd = add(Rs,#-1) - if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && - ((MI->getOperand(2).getImm() == 1) || - (MI->getOperand(2).getImm() == -1))) + if (isIntRegForSubInst(SrcReg) && MI.getOperand(2).isImm() && + ((MI.getOperand(2).getImm() == 1) || + (MI.getOperand(2).getImm() == -1))) return HexagonII::HSIG_A; } break; case Hexagon::A2_add: // Rx = add(Rx,Rs) - DstReg = MI->getOperand(0).getReg(); - Src1Reg = MI->getOperand(1).getReg(); - Src2Reg = MI->getOperand(2).getReg(); + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + Src2Reg = MI.getOperand(2).getReg(); if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) && isIntRegForSubInst(Src2Reg)) return HexagonII::HSIG_A; @@ -3800,18 +3761,18 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // Same as zxtb. // Rd16=and(Rs16,#255) // Rd16=and(Rs16,#1) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && - MI->getOperand(2).isImm() && - ((MI->getOperand(2).getImm() == 1) || - (MI->getOperand(2).getImm() == 255))) + MI.getOperand(2).isImm() && + ((MI.getOperand(2).getImm() == 1) || + (MI.getOperand(2).getImm() == 255))) return HexagonII::HSIG_A; break; case Hexagon::A2_tfr: // Rd = Rs - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) return HexagonII::HSIG_A; break; @@ -3820,7 +3781,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // Do not test for #u6 size since the const is getting extended // regardless and compound could be formed. // Rd = #-1 - DstReg = MI->getOperand(0).getReg(); + DstReg = MI.getOperand(0).getReg(); if (isIntRegForSubInst(DstReg)) return HexagonII::HSIG_A; break; @@ -3831,51 +3792,51 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // if ([!]P0[.new]) Rd = #0 // Actual form: // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>; - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg && - MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) return HexagonII::HSIG_A; break; case Hexagon::C2_cmpeqi: // P0 = cmp.eq(Rs,#u2) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (Hexagon::PredRegsRegClass.contains(DstReg) && Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) && - MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) + MI.getOperand(2).isImm() && isUInt<2>(MI.getOperand(2).getImm())) return HexagonII::HSIG_A; break; case Hexagon::A2_combineii: case Hexagon::A4_combineii: // Rdd = combine(#u2,#U2) - DstReg = MI->getOperand(0).getReg(); + DstReg = MI.getOperand(0).getReg(); if (isDblRegForSubInst(DstReg, HRI) && - ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) || - (MI->getOperand(1).isGlobal() && - isUInt<2>(MI->getOperand(1).getOffset()))) && - ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) || - (MI->getOperand(2).isGlobal() && - isUInt<2>(MI->getOperand(2).getOffset())))) + ((MI.getOperand(1).isImm() && isUInt<2>(MI.getOperand(1).getImm())) || + (MI.getOperand(1).isGlobal() && + isUInt<2>(MI.getOperand(1).getOffset()))) && + ((MI.getOperand(2).isImm() && isUInt<2>(MI.getOperand(2).getImm())) || + (MI.getOperand(2).isGlobal() && + isUInt<2>(MI.getOperand(2).getOffset())))) return HexagonII::HSIG_A; break; case Hexagon::A4_combineri: // Rdd = combine(Rs,#0) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && - ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) || - (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0))) + ((MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) || + (MI.getOperand(2).isGlobal() && MI.getOperand(2).getOffset() == 0))) return HexagonII::HSIG_A; break; case Hexagon::A4_combineir: // Rdd = combine(#0,Rs) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(2).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && - ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) || - (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0))) + ((MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) || + (MI.getOperand(1).isGlobal() && MI.getOperand(1).getOffset() == 0))) return HexagonII::HSIG_A; break; case Hexagon::A2_sxtb: @@ -3883,8 +3844,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::A2_zxtb: case Hexagon::A2_zxth: // Rd = sxth/sxtb/zxtb/zxth(Rs) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) return HexagonII::HSIG_A; break; @@ -3893,47 +3854,43 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_None; } - -short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const { - return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real); +short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const { + return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real); } - // Return first non-debug instruction in the basic block. MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) const { for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) { - MachineInstr *MI = &*MII; - if (MI->isDebugValue()) + MachineInstr &MI = *MII; + if (MI.isDebugValue()) continue; - return MI; + return &MI; } return nullptr; } - unsigned HexagonInstrInfo::getInstrTimingClassLatency( - const InstrItineraryData *ItinData, const MachineInstr *MI) const { + const InstrItineraryData *ItinData, const MachineInstr &MI) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may // still have a MinLatency property, which getStageLatency checks. if (!ItinData) - return getInstrLatency(ItinData, *MI); + return getInstrLatency(ItinData, MI); // Get the latency embedded in the itinerary. If we're not using timing class // latencies or if we using BSB scheduling, then restrict the maximum latency // to 1 (that is, either 0 or 1). - if (MI->isTransient()) + if (MI.isTransient()) return 0; - unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass()); + unsigned Latency = ItinData->getStageLatency(MI.getDesc().getSchedClass()); if (!EnableTimingClassLatency || - MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>(). + MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>(). useBSBScheduling()) if (Latency > 1) Latency = 1; return Latency; } - // inverts the predication logic. // p -> NotP // NotP -> P @@ -3946,7 +3903,6 @@ bool HexagonInstrInfo::getInvertedPredSense( return true; } - unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { int InvPredOpcode; InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) @@ -3957,10 +3913,9 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { llvm_unreachable("Unexpected predicated instruction"); } - // Returns the max value that doesn't need to be extended. -int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; unsigned isSigned = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; unsigned bits = (F >> HexagonII::ExtentBitsPos) @@ -3972,16 +3927,14 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { return ~(-1U << bits); } - -unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; +unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask; } - // Returns the min value that doesn't need to be extended. -int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; unsigned isSigned = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; unsigned bits = (F >> HexagonII::ExtentBitsPos) @@ -3993,24 +3946,23 @@ int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { return 0; } - // Returns opcode of the non-extended equivalent instruction. -short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const { +short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { // Check if the instruction has a register form that uses register in place // of the extended operand, if so return that as the non-extended form. - short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode()); + short NonExtOpcode = Hexagon::getRegForm(MI.getOpcode()); if (NonExtOpcode >= 0) return NonExtOpcode; - if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + if (MI.getDesc().mayLoad() || MI.getDesc().mayStore()) { // Check addressing mode and retrieve non-ext equivalent instruction. switch (getAddrMode(MI)) { case HexagonII::Absolute : - return Hexagon::getBaseWithImmOffset(MI->getOpcode()); + return Hexagon::getBaseWithImmOffset(MI.getOpcode()); case HexagonII::BaseImmOffset : - return Hexagon::getBaseWithRegOffset(MI->getOpcode()); + return Hexagon::getBaseWithRegOffset(MI.getOpcode()); case HexagonII::BaseLongOffset: - return Hexagon::getRegShlForm(MI->getOpcode()); + return Hexagon::getRegShlForm(MI.getOpcode()); default: return -1; @@ -4019,15 +3971,14 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const { return -1; } - bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const { if (Cond.empty()) return false; assert(Cond.size() == 2); if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) { - DEBUG(dbgs() << "No predregs for new-value jumps/endloop"); - return false; + DEBUG(dbgs() << "No predregs for new-value jumps/endloop"); + return false; } PredReg = Cond[1].getReg(); PredRegPos = 1; @@ -4040,26 +3991,23 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, return true; } - -short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const { - return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo); +short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const { + return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo); } - -short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const { - return Hexagon::getRegForm(MI->getOpcode()); +short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const { + return Hexagon::getRegForm(MI.getOpcode()); } - // Return the number of bytes required to encode the instruction. // Hexagon instructions are fixed length, 4 bytes, unless they // use a constant extender, which requires another 4 bytes. // For debug instructions and prolog labels, return 0. -unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { - if (MI->isDebugValue() || MI->isPosition()) +unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const { + if (MI.isDebugValue() || MI.isPosition()) return 0; - unsigned Size = MI->getDesc().getSize(); + unsigned Size = MI.getDesc().getSize(); if (!Size) // Assume the default insn size in case it cannot be determined // for whatever reason. @@ -4069,71 +4017,65 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { Size += HEXAGON_INSTR_SIZE; // Try and compute number of instructions in asm. - if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) { - const MachineBasicBlock &MBB = *MI->getParent(); + if (BranchRelaxAsmLarge && MI.getOpcode() == Hexagon::INLINEASM) { + const MachineBasicBlock &MBB = *MI.getParent(); const MachineFunction *MF = MBB.getParent(); const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); // Count the number of register definitions to find the asm string. unsigned NumDefs = 0; - for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + for (; MI.getOperand(NumDefs).isReg() && MI.getOperand(NumDefs).isDef(); ++NumDefs) - assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + assert(NumDefs != MI.getNumOperands()-2 && "No asm string?"); - assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + assert(MI.getOperand(NumDefs).isSymbol() && "No asm string?"); // Disassemble the AsmStr and approximate number of instructions. - const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + const char *AsmStr = MI.getOperand(NumDefs).getSymbolName(); Size = getInlineAsmLength(AsmStr, *MAI); } return Size; } - -uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; +uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::TypePos) & HexagonII::TypeMask; } - -unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const { - const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget(); +unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { + const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget(); const InstrItineraryData &II = *ST.getInstrItineraryData(); - const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass()); + const InstrStage &IS = *II.beginStage(MI.getDesc().getSchedClass()); return IS.getUnits(); } - unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; } - // Calculate size of the basic block without debug instructions. unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { return nonDbgMICount(BB->instr_begin(), BB->instr_end()); } - unsigned HexagonInstrInfo::nonDbgBundleSize( MachineBasicBlock::const_iterator BundleHead) const { assert(BundleHead->isBundle() && "Not a bundle header"); auto MII = BundleHead.getInstrIterator(); // Skip the bundle header. - return nonDbgMICount(++MII, getBundleEnd(*BundleHead)); + return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator())); } - /// immediateExtend - Changes the instruction in place to one using an immediate /// extender. -void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { +void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { assert((isExtendable(MI)||isConstExtended(MI)) && "Instruction must be extendable"); // Find which operand is extendable. short ExtOpNum = getCExtOpNum(MI); - MachineOperand &MO = MI->getOperand(ExtOpNum); + MachineOperand &MO = MI.getOperand(ExtOpNum); // This needs to be something we understand. assert((MO.isMBB() || MO.isImm()) && "Branch with unknown extendable field type"); @@ -4141,40 +4083,37 @@ void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); } - bool HexagonInstrInfo::invertAndChangeJumpTarget( - MachineInstr* MI, MachineBasicBlock* NewTarget) const { + MachineInstr &MI, MachineBasicBlock *NewTarget) const { DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" - << NewTarget->getNumber(); MI->dump();); - assert(MI->isBranch()); - unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode()); - int TargetPos = MI->getNumOperands() - 1; + << NewTarget->getNumber(); MI.dump();); + assert(MI.isBranch()); + unsigned NewOpcode = getInvertedPredicatedOpcode(MI.getOpcode()); + int TargetPos = MI.getNumOperands() - 1; // In general branch target is the last operand, // but some implicit defs added at the end might change it. - while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB()) + while ((TargetPos > -1) && !MI.getOperand(TargetPos).isMBB()) --TargetPos; - assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB()); - MI->getOperand(TargetPos).setMBB(NewTarget); - if (EnableBranchPrediction && isPredicatedNew(*MI)) { + assert((TargetPos >= 0) && MI.getOperand(TargetPos).isMBB()); + MI.getOperand(TargetPos).setMBB(NewTarget); + if (EnableBranchPrediction && isPredicatedNew(MI)) { NewOpcode = reversePrediction(NewOpcode); } - MI->setDesc(get(NewOpcode)); + MI.setDesc(get(NewOpcode)); return true; } - void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* +++ The code below is used to generate complete set of Hexagon Insn +++ */ MachineFunction::iterator A = MF.begin(); MachineBasicBlock &B = *A; MachineBasicBlock::iterator I = B.begin(); - MachineInstr *MI = &*I; - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = I->getDebugLoc(); MachineInstr *NewMI; for (unsigned insn = TargetOpcode::GENERIC_OP_END+1; insn < Hexagon::INSTRUCTION_LIST_END; ++insn) { - NewMI = BuildMI(B, MI, DL, get(insn)); + NewMI = BuildMI(B, I, DL, get(insn)); DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) << " Class: " << NewMI->getDesc().getSchedClass()); NewMI->eraseFromParent(); @@ -4182,17 +4121,15 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* --- The code above is used to generate complete set of Hexagon Insn --- */ } - // inverts the predication logic. // p -> NotP // NotP -> P -bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const { - DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump()); - MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode()))); +bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const { + DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI.dump()); + MI.setDesc(get(getInvertedPredicatedOpcode(MI.getOpcode()))); return true; } - // Reverse the branch prediction. unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { int PredRevOpcode = -1; @@ -4204,14 +4141,12 @@ unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { return PredRevOpcode; } - // TODO: Add more rigorous validation. bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond) const { return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); } - -short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr *MI) const { - return Hexagon::xformRegToImmOffset(MI->getOpcode()); +short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const { + return Hexagon::xformRegToImmOffset(MI.getOpcode()); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 66b6883..2358d4b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -16,9 +16,14 @@ #include "HexagonRegisterInfo.h" #include "MCTargetDesc/HexagonBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/Target/TargetInstrInfo.h" +#include <cstdint> +#include <vector> #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -29,9 +34,10 @@ struct EVT; class HexagonSubtarget; class HexagonInstrInfo : public HexagonGenInstrInfo { - virtual void anchor(); const HexagonRegisterInfo RI; + virtual void anchor(); + public: explicit HexagonInstrInfo(HexagonSubtarget &ST); @@ -73,7 +79,7 @@ public: /// condition. These operands can be passed to other TargetInstrInfo /// methods to create new branches. /// - /// Note that RemoveBranch and InsertBranch must be implemented to support + /// Note that removeBranch and insertBranch must be implemented to support /// cases where this method returns success. /// /// If AllowModify is true, then this routine is allowed to modify the basic @@ -87,7 +93,8 @@ public: /// Remove the branching code at the end of the specific MBB. /// This is only invoked in cases where AnalyzeBranch returns success. It /// returns the number of instructions that were removed. - unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; /// Insert branch code into the end of the specified MachineBasicBlock. /// The operands to this method are the same as those @@ -99,9 +106,26 @@ public: /// cases where AnalyzeBranch doesn't apply because there was no original /// branch to analyze. At least this much must be implemented, else tail /// merging needs to be disabled. - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - const DebugLoc &DL) const override; + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + /// Analyze the loop code, return true if it cannot be understood. Upon + /// success, this function returns false and returns information about the + /// induction variable and compare instruction used at the end. + bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const override; + + /// Generate code to reduce the loop iteration by one and check if the loop is + /// finished. Return the value/register of the the new loop count. We need + /// this function when peeling off one or more iterations of a loop. This + /// function assumes the nth iteration is peeled first. + unsigned reduceLoopCount(MachineBasicBlock &MBB, + MachineInstr *IndVar, MachineInstr &Cmp, + SmallVectorImpl<MachineOperand> &Cond, + SmallVectorImpl<MachineInstr *> &PrevInsts, + unsigned Iter, unsigned MaxIter) const override; /// Return true if it's profitable to predicate /// instructions with accumulated instruction latency of "NumCycles" @@ -172,9 +196,14 @@ public: /// anything was changed. bool expandPostRAPseudo(MachineInstr &MI) const override; + /// \brief Get the base register and byte offset of a load/store instr. + bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; + /// Reverses the branch condition of the specified condition list, /// returning false on success and true if it cannot be reversed. - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) + bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; /// Insert a noop into the instruction stream at the specified point. @@ -184,6 +213,9 @@ public: /// Returns true if the instruction is already predicated. bool isPredicated(const MachineInstr &MI) const override; + /// Return true for post-incremented instructions. + bool isPostIncrement(const MachineInstr &MI) const override; + /// Convert the instruction into a predicated instruction. /// It returns true if the operation was successful. bool PredicateInstruction(MachineInstr &MI, @@ -234,7 +266,7 @@ public: /// PredCost. unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, - unsigned *PredCost = 0) const override; + unsigned *PredCost = nullptr) const override; /// Create machine specific model for scheduling. DFAPacketizer * @@ -248,6 +280,16 @@ public: areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; + /// For instructions with a base and offset, return the position of the + /// base register and offset operands. + bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, + unsigned &OffsetPos) const override; + + /// If the instruction is an increment of a constant value, return the amount. + bool getIncrementValue(const MachineInstr &MI, int &Value) const override; + + bool isTailCall(const MachineInstr &MI) const override; + /// HexagonInstrInfo specifics. /// @@ -255,49 +297,48 @@ public: unsigned createVR(MachineFunction* MF, MVT VT) const; - bool isAbsoluteSet(const MachineInstr* MI) const; - bool isAccumulator(const MachineInstr *MI) const; - bool isComplex(const MachineInstr *MI) const; - bool isCompoundBranchInstr(const MachineInstr *MI) const; - bool isCondInst(const MachineInstr *MI) const; - bool isConditionalALU32 (const MachineInstr* MI) const; - bool isConditionalLoad(const MachineInstr* MI) const; - bool isConditionalStore(const MachineInstr* MI) const; - bool isConditionalTransfer(const MachineInstr* MI) const; - bool isConstExtended(const MachineInstr *MI) const; - bool isDeallocRet(const MachineInstr *MI) const; - bool isDependent(const MachineInstr *ProdMI, - const MachineInstr *ConsMI) const; - bool isDotCurInst(const MachineInstr* MI) const; - bool isDotNewInst(const MachineInstr* MI) const; - bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const; - bool isEarlySourceInstr(const MachineInstr *MI) const; + bool isAbsoluteSet(const MachineInstr &MI) const; + bool isAccumulator(const MachineInstr &MI) const; + bool isComplex(const MachineInstr &MI) const; + bool isCompoundBranchInstr(const MachineInstr &MI) const; + bool isCondInst(const MachineInstr &MI) const; + bool isConditionalALU32 (const MachineInstr &MI) const; + bool isConditionalLoad(const MachineInstr &MI) const; + bool isConditionalStore(const MachineInstr &MI) const; + bool isConditionalTransfer(const MachineInstr &MI) const; + bool isConstExtended(const MachineInstr &MI) const; + bool isDeallocRet(const MachineInstr &MI) const; + bool isDependent(const MachineInstr &ProdMI, + const MachineInstr &ConsMI) const; + bool isDotCurInst(const MachineInstr &MI) const; + bool isDotNewInst(const MachineInstr &MI) const; + bool isDuplexPair(const MachineInstr &MIa, const MachineInstr &MIb) const; + bool isEarlySourceInstr(const MachineInstr &MI) const; bool isEndLoopN(unsigned Opcode) const; bool isExpr(unsigned OpType) const; - bool isExtendable(const MachineInstr* MI) const; - bool isExtended(const MachineInstr* MI) const; - bool isFloat(const MachineInstr *MI) const; - bool isHVXMemWithAIndirect(const MachineInstr *I, - const MachineInstr *J) const; - bool isIndirectCall(const MachineInstr *MI) const; - bool isIndirectL4Return(const MachineInstr *MI) const; - bool isJumpR(const MachineInstr *MI) const; - bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const; - bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, - const MachineInstr *ESMI) const; - bool isLateResultInstr(const MachineInstr *MI) const; - bool isLateSourceInstr(const MachineInstr *MI) const; - bool isLoopN(const MachineInstr *MI) const; - bool isMemOp(const MachineInstr *MI) const; - bool isNewValue(const MachineInstr* MI) const; + bool isExtendable(const MachineInstr &MI) const; + bool isExtended(const MachineInstr &MI) const; + bool isFloat(const MachineInstr &MI) const; + bool isHVXMemWithAIndirect(const MachineInstr &I, + const MachineInstr &J) const; + bool isIndirectCall(const MachineInstr &MI) const; + bool isIndirectL4Return(const MachineInstr &MI) const; + bool isJumpR(const MachineInstr &MI) const; + bool isJumpWithinBranchRange(const MachineInstr &MI, unsigned offset) const; + bool isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, + const MachineInstr &ESMI) const; + bool isLateResultInstr(const MachineInstr &MI) const; + bool isLateSourceInstr(const MachineInstr &MI) const; + bool isLoopN(const MachineInstr &MI) const; + bool isMemOp(const MachineInstr &MI) const; + bool isNewValue(const MachineInstr &MI) const; bool isNewValue(unsigned Opcode) const; - bool isNewValueInst(const MachineInstr* MI) const; - bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueInst(const MachineInstr &MI) const; + bool isNewValueJump(const MachineInstr &MI) const; bool isNewValueJump(unsigned Opcode) const; - bool isNewValueStore(const MachineInstr* MI) const; + bool isNewValueStore(const MachineInstr &MI) const; bool isNewValueStore(unsigned Opcode) const; - bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const; - bool isPostIncrement(const MachineInstr* MI) const; + bool isOperandExtended(const MachineInstr &MI, unsigned OperandNum) const; bool isPredicatedNew(const MachineInstr &MI) const; bool isPredicatedNew(unsigned Opcode) const; bool isPredicatedTrue(const MachineInstr &MI) const; @@ -305,106 +346,101 @@ public: bool isPredicated(unsigned Opcode) const; bool isPredicateLate(unsigned Opcode) const; bool isPredictedTaken(unsigned Opcode) const; - bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const; + bool isSaveCalleeSavedRegsCall(const MachineInstr &MI) const; bool isSignExtendingLoad(const MachineInstr &MI) const; - bool isSolo(const MachineInstr* MI) const; - bool isSpillPredRegOp(const MachineInstr *MI) const; - bool isTailCall(const MachineInstr *MI) const; - bool isTC1(const MachineInstr *MI) const; - bool isTC2(const MachineInstr *MI) const; - bool isTC2Early(const MachineInstr *MI) const; - bool isTC4x(const MachineInstr *MI) const; - bool isToBeScheduledASAP(const MachineInstr *MI1, - const MachineInstr *MI2) const; - bool isV60VectorInstruction(const MachineInstr *MI) const; + bool isSolo(const MachineInstr &MI) const; + bool isSpillPredRegOp(const MachineInstr &MI) const; + bool isTC1(const MachineInstr &MI) const; + bool isTC2(const MachineInstr &MI) const; + bool isTC2Early(const MachineInstr &MI) const; + bool isTC4x(const MachineInstr &MI) const; + bool isToBeScheduledASAP(const MachineInstr &MI1, + const MachineInstr &MI2) const; + bool isV60VectorInstruction(const MachineInstr &MI) const; bool isValidAutoIncImm(const EVT VT, const int Offset) const; bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; - bool isVecAcc(const MachineInstr *MI) const; - bool isVecALU(const MachineInstr *MI) const; - bool isVecUsableNextPacket(const MachineInstr *ProdMI, - const MachineInstr *ConsMI) const; + bool isVecAcc(const MachineInstr &MI) const; + bool isVecALU(const MachineInstr &MI) const; + bool isVecUsableNextPacket(const MachineInstr &ProdMI, + const MachineInstr &ConsMI) const; bool isZeroExtendingLoad(const MachineInstr &MI) const; - bool addLatencyToSchedule(const MachineInstr *MI1, - const MachineInstr *MI2) const; - bool canExecuteInBundle(const MachineInstr *First, - const MachineInstr *Second) const; + bool addLatencyToSchedule(const MachineInstr &MI1, + const MachineInstr &MI2) const; + bool canExecuteInBundle(const MachineInstr &First, + const MachineInstr &Second) const; + bool doesNotReturn(const MachineInstr &CallMI) const; bool hasEHLabel(const MachineBasicBlock *B) const; - bool hasNonExtEquivalent(const MachineInstr *MI) const; - bool hasPseudoInstrPair(const MachineInstr *MI) const; + bool hasNonExtEquivalent(const MachineInstr &MI) const; + bool hasPseudoInstrPair(const MachineInstr &MI) const; bool hasUncondBranch(const MachineBasicBlock *B) const; - bool mayBeCurLoad(const MachineInstr* MI) const; - bool mayBeNewStore(const MachineInstr* MI) const; - bool producesStall(const MachineInstr *ProdMI, - const MachineInstr *ConsMI) const; - bool producesStall(const MachineInstr *MI, + bool mayBeCurLoad(const MachineInstr &MI) const; + bool mayBeNewStore(const MachineInstr &MI) const; + bool producesStall(const MachineInstr &ProdMI, + const MachineInstr &ConsMI) const; + bool producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator MII) const; - bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const; + bool predCanBeUsedAsDotNew(const MachineInstr &MI, unsigned PredReg) const; bool PredOpcodeHasJMP_c(unsigned Opcode) const; bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const; - - short getAbsoluteForm(const MachineInstr *MI) const; - unsigned getAddrMode(const MachineInstr* MI) const; - unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset, + short getAbsoluteForm(const MachineInstr &MI) const; + unsigned getAddrMode(const MachineInstr &MI) const; + unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset, unsigned &AccessSize) const; - bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos, - unsigned &OffsetPos) const; short getBaseWithLongOffset(short Opcode) const; - short getBaseWithLongOffset(const MachineInstr *MI) const; - short getBaseWithRegOffset(const MachineInstr *MI) const; + short getBaseWithLongOffset(const MachineInstr &MI) const; + short getBaseWithRegOffset(const MachineInstr &MI) const; SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const; - unsigned getCExtOpNum(const MachineInstr *MI) const; + unsigned getCExtOpNum(const MachineInstr &MI) const; HexagonII::CompoundGroup - getCompoundCandidateGroup(const MachineInstr *MI) const; - unsigned getCompoundOpcode(const MachineInstr *GA, - const MachineInstr *GB) const; + getCompoundCandidateGroup(const MachineInstr &MI) const; + unsigned getCompoundOpcode(const MachineInstr &GA, + const MachineInstr &GB) const; int getCondOpcode(int Opc, bool sense) const; - int getDotCurOp(const MachineInstr* MI) const; - int getDotNewOp(const MachineInstr* MI) const; - int getDotNewPredJumpOp(const MachineInstr *MI, + int getDotCurOp(const MachineInstr &MI) const; + int getDotNewOp(const MachineInstr &MI) const; + int getDotNewPredJumpOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const; - int getDotNewPredOp(const MachineInstr *MI, + int getDotNewPredOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const; int getDotOldOp(const int opc) const; - HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI) + HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr &MI) const; - short getEquivalentHWInstr(const MachineInstr *MI) const; + short getEquivalentHWInstr(const MachineInstr &MI) const; MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const; unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI) const; + const MachineInstr &MI) const; bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const; unsigned getInvertedPredicatedOpcode(const int Opc) const; - int getMaxValue(const MachineInstr *MI) const; - unsigned getMemAccessSize(const MachineInstr* MI) const; - int getMinValue(const MachineInstr *MI) const; - short getNonExtOpcode(const MachineInstr *MI) const; + int getMaxValue(const MachineInstr &MI) const; + unsigned getMemAccessSize(const MachineInstr &MI) const; + int getMinValue(const MachineInstr &MI) const; + short getNonExtOpcode(const MachineInstr &MI) const; bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const; - short getPseudoInstrPair(const MachineInstr *MI) const; - short getRegForm(const MachineInstr *MI) const; - unsigned getSize(const MachineInstr *MI) const; - uint64_t getType(const MachineInstr* MI) const; - unsigned getUnits(const MachineInstr* MI) const; + short getPseudoInstrPair(const MachineInstr &MI) const; + short getRegForm(const MachineInstr &MI) const; + unsigned getSize(const MachineInstr &MI) const; + uint64_t getType(const MachineInstr &MI) const; + unsigned getUnits(const MachineInstr &MI) const; unsigned getValidSubTargets(const unsigned Opcode) const; - /// getInstrTimingClassLatency - Compute the instruction latency of a given /// instruction using Timing Class information, if available. unsigned nonDbgBBSize(const MachineBasicBlock *BB) const; unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const; - - void immediateExtend(MachineInstr *MI) const; - bool invertAndChangeJumpTarget(MachineInstr* MI, + void immediateExtend(MachineInstr &MI) const; + bool invertAndChangeJumpTarget(MachineInstr &MI, MachineBasicBlock* NewTarget) const; void genAllInsnTimingClasses(MachineFunction &MF) const; - bool reversePredSense(MachineInstr* MI) const; + bool reversePredSense(MachineInstr &MI) const; unsigned reversePrediction(unsigned Opcode) const; bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const; - short xformRegToImmOffset(const MachineInstr *MI) const; + short xformRegToImmOffset(const MachineInstr &MI) const; }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td index 74dc5ac..c5719ad 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -14,54 +14,6 @@ include "HexagonInstrFormats.td" include "HexagonOperands.td" include "HexagonInstrEnc.td" -// Pattern fragment that combines the value type and the register class -// into a single parameter. -// The pat frags in the definitions below need to have a named register, -// otherwise i32 will be assumed regardless of the register class. The -// name of the register does not matter. -def I1 : PatLeaf<(i1 PredRegs:$R)>; -def I32 : PatLeaf<(i32 IntRegs:$R)>; -def I64 : PatLeaf<(i64 DoubleRegs:$R)>; -def F32 : PatLeaf<(f32 IntRegs:$R)>; -def F64 : PatLeaf<(f64 DoubleRegs:$R)>; - -// Pattern fragments to extract the low and high subregisters from a -// 64-bit value. -def LoReg: OutPatFrag<(ops node:$Rs), - (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>; -def HiReg: OutPatFrag<(ops node:$Rs), - (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>; - -def orisadd: PatFrag<(ops node:$Addr, node:$off), - (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>; - -// SDNode for converting immediate C to C-1. -def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-1 as an SDNode. - int32_t imm = N->getSExtValue(); - return XformSToSM1Imm(imm, SDLoc(N)); -}]>; - -// SDNode for converting immediate C to C-2. -def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-2 as an SDNode. - int32_t imm = N->getSExtValue(); - return XformSToSM2Imm(imm, SDLoc(N)); -}]>; - -// SDNode for converting immediate C to C-3. -def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-3 as an SDNode. - int32_t imm = N->getSExtValue(); - return XformSToSM3Imm(imm, SDLoc(N)); -}]>; - -// SDNode for converting immediate C to C-1. -def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-1 as an SDNode. - uint32_t imm = N->getZExtValue(); - return XformUToUM1Imm(imm, SDLoc(N)); -}]>; //===----------------------------------------------------------------------===// // Compare @@ -92,32 +44,15 @@ class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp> let Inst{1-0} = dst; } -def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10Ext>; -def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10Ext>; -def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9Ext>; - -class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred> - : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)), - (MI IntRegs:$src1, ImmPred:$src2)>; - -def : T_CMP_pat <C2_cmpeqi, seteq, s10ImmPred>; -def : T_CMP_pat <C2_cmpgti, setgt, s10ImmPred>; -def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>; +def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10_0Ext>; +def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10_0Ext>; +def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9_0Ext>; //===----------------------------------------------------------------------===// // ALU32/ALU + //===----------------------------------------------------------------------===// // Add. -def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; -def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; - -def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; -def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; - let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm> @@ -227,17 +162,6 @@ defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>; defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>; defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>; -// Pats for instruction selection. -class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT> - : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>; - -def: BinOp32_pat<add, A2_add, i32>; -def: BinOp32_pat<and, A2_and, i32>; -def: BinOp32_pat<or, A2_or, i32>; -def: BinOp32_pat<sub, A2_sub, i32>; -def: BinOp32_pat<xor, A2_xor, i32>; - // A few special cases producing register pairs: let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>; @@ -252,9 +176,6 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>; } -def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>; -def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>; - let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm> : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -282,23 +203,6 @@ let Itinerary = ALU32_3op_tc_2early_SLOT0123 in { def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>; } -// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones -// that reverse the order of the operands. -class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; - -// Pats for compares. They use PatFrags as operands, not SDNodes, -// since seteq/setgt/etc. are defined as ParFrags. -class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT> - : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt))>; - -def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>; -def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>; -def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>; - -def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>; -def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>; - let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in def C2_mux: ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt), @@ -320,9 +224,6 @@ def C2_mux: ALU32_rr<(outs IntRegs:$Rd), let Inst{4-0} = Rd; } -def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; - // Combines the two immediates into a double register. // Increase complexity to make it greater than any complexity of a combine // that involves a register. @@ -330,10 +231,9 @@ def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1, AddedComplexity = 75 in -def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), +def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8_0Ext:$s8, s8_0Imm:$S8), "$Rdd = combine(#$s8, #$S8)", - [(set (i64 DoubleRegs:$Rdd), - (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> { + []> { bits<5> Rdd; bits<8> s8; bits<8> S8; @@ -352,7 +252,7 @@ def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), let hasNewValue = 1, hasSideEffects = 0 in class T_Addri_Pred <bit PredNot, bit PredNew> : ALU32_ri <(outs IntRegs:$Rd), - (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), + (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8), !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ", ") $Rd = ")#"add($Rs, #$s8)"> { bits<5> Rd; @@ -406,8 +306,8 @@ multiclass Addri_Pred<string mnemonic, bit PredNot> { let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in multiclass Addri_base<string mnemonic, SDNode OpNode> { let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in { - let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in - def A2_#NAME : T_Addri<s16Ext>; + let opExtendable = 2, opExtentBits = 16, isPredicable = 1, isAdd = 1 in + def A2_#NAME : T_Addri<s16_0Ext>; let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in { defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>; @@ -418,9 +318,6 @@ multiclass Addri_base<string mnemonic, SDNode OpNode> { defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel; -def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)), - (i32 (A2_addi I32:$Rs, imm:$s16))>; - let hasNewValue = 1, hasSideEffects = 0, isPseudo = 1 in def A2_iconst : ALU32_ri <(outs IntRegs:$Rd), @@ -436,9 +333,9 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10, InputType = "imm", hasNewValue = 1 in class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp> : ALU32_ri <(outs IntRegs:$Rd), - (ins IntRegs:$Rs, s10Ext:$s10), + (ins IntRegs:$Rs, s10_0Ext:$s10), "$Rd = "#mnemonic#"($Rs, #$s10)" , - [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> { + []> { bits<5> Rd; bits<5> Rs; bits<10> s10; @@ -461,7 +358,7 @@ def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel; // Rd32=sub(#s10,Rs32) let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1, opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in -def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs), +def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10_0Ext:$s10, IntRegs:$Rs), "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel { bits<5> Rd; bits<10> s10; @@ -483,16 +380,9 @@ def A2_nop: ALU32Inst <(outs), (ins), "nop" > { let Inst{27-24} = 0b1111; } -def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs), - (A2_subri imm:$s10, IntRegs:$Rs)>; - -// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). -def: Pat<(not (i32 IntRegs:$src1)), - (A2_subri -1, IntRegs:$src1)>; - let hasSideEffects = 0, hasNewValue = 1 in class T_tfr16<bit isHi> - : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16Imm:$u16), + : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16_0Imm:$u16), "$Rx"#!if(isHi, ".h", ".l")#" = #$u16", [], "$src1 = $Rx" > { bits<5> Rx; @@ -601,7 +491,7 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12, isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR", hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in class T_TFRI_Pred<bit PredNot, bit PredNew> - : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12Ext:$s12), + : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12_0Ext:$s12), "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12", [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel { let isPredicatedFalse = PredNot; @@ -630,8 +520,8 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1, CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0, isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in -def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16", - [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, +def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16_0Ext:$s16), "$Rd = #$s16", + [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredRel { bits<5> Rd; bits<16> s16; @@ -649,17 +539,17 @@ defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel; // Assembler mapped let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isAsmParserOnly = 1 in -def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), +def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8_0Imm64:$src1), "$dst = #$src1", - [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>; + []>; // TODO: see if this instruction can be deleted.. let isExtendable = 1, opExtendable = 1, opExtentBits = 6, isAsmParserOnly = 1 in { -def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1), +def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64_0Imm:$src1), "$dst = #$src1">; def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst), - (ins s8Ext:$src1, s8Imm:$src2), + (ins s8_0Ext:$src1, s8_0Imm:$src2), "$dst = combine(##$src1, #$src2)">; } @@ -692,27 +582,20 @@ class T_MUX1 <bit MajOp, dag ins, string AsmStr> } let opExtendable = 2 in -def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs), +def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8_0Ext:$s8, IntRegs:$Rs), "$Rd = mux($Pu, #$s8, $Rs)">; let opExtendable = 3 in -def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), +def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8), "$Rd = mux($Pu, $Rs, #$s8)">; -def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)), - (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>; - -def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)), - (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>; - // C2_muxii: Scalar mux immediates. let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 2 in def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), - (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8), + (ins PredRegs:$Pu, s8_0Ext:$s8, s8_0Imm:$S8), "$Rd = mux($Pu, #$s8, #$S8)" , - [(set (i32 IntRegs:$Rd), - (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > { + []> { bits<5> Rd; bits<2> Pu; bits<8> s8; @@ -729,9 +612,9 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), } let isCodeGenOnly = 1, isPseudo = 1 in -def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd), - (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), - ".error \"should not emit\" ", []>; +def PS_pselect : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"should not emit\" ", []>; //===----------------------------------------------------------------------===// @@ -809,7 +692,7 @@ defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel; defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel; // Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255). -// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has +// Compiler would want to generate 'zxtb' instead of 'and' because 'zxtb' has // predicated forms while 'and' doesn't. Since integrated assembler can't // handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where // immediate operand is set to '255'. @@ -845,11 +728,6 @@ multiclass ZXTB_base <string mnemonic, bits<3> minOp> { defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel; -def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; -def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; - //===----------------------------------------------------------------------===// // Template class for vector add and avg //===----------------------------------------------------------------------===// @@ -980,10 +858,6 @@ class T_vcmp <string Str, bits<4> minOp> let Inst{12-8} = Rtt; } -class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T> - : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), - (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; - // Vector compare bytes def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>; def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>; @@ -998,15 +872,6 @@ def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>; def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>; def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>; -def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>; -def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>; -def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>; -def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>; -def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>; -def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>; -def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>; -def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; - //===----------------------------------------------------------------------===// // ALU32/PERM - //===----------------------------------------------------------------------===// @@ -1019,10 +884,10 @@ def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; // transform it to cmp.gt subtracting 1 from the immediate. let isPseudo = 1 in { def C2_cmpgei: ALU32Inst < - (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8), + (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8_0Ext:$s8), "$Pd = cmp.ge($Rs, #$s8)">; def C2_cmpgeui: ALU32Inst < - (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8), + (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8_0Ext:$s8), "$Pd = cmp.geu($Rs, #$s8)">; } @@ -1112,23 +977,6 @@ let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>; } -// Add halfword. -def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), - (A2_addh_l16_ll I32:$src1, I32:$src2)>; - -def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), - (A2_addh_l16_hl I32:$src1, I32:$src2)>; - -def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), - (A2_addh_h16_ll I32:$src1, I32:$src2)>; - -// Subtract halfword. -def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), - (A2_subh_l16_ll I32:$src1, I32:$src2)>; - -def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), - (A2_subh_h16_ll I32:$src1, I32:$src2)>; - let hasSideEffects = 0, hasNewValue = 1 in def S2_parityp: ALU64Inst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), @@ -1168,52 +1016,6 @@ def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >; def A2_max : T_XTYPE_MIN_MAX < 1, 0 >; def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >; -// Here, depending on the operand being selected, we'll either generate a -// min or max instruction. -// Ex: -// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected -// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. -// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value -// is selected and the corresponding HexagonInst is passed in 'SwapInst'. - -multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT, - InstHexagon Inst, InstHexagon SwapInst> { - def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), - (VT RC:$src1), (VT RC:$src2)), - (Inst RC:$src1, RC:$src2)>; - def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), - (VT RC:$src2), (VT RC:$src1)), - (SwapInst RC:$src1, RC:$src2)>; -} - - -multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { - defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>; - - def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), - (i32 PositiveHalfWord:$src2))), - (i32 PositiveHalfWord:$src1), - (i32 PositiveHalfWord:$src2))), i16), - (Inst IntRegs:$src1, IntRegs:$src2)>; - - def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), - (i32 PositiveHalfWord:$src2))), - (i32 PositiveHalfWord:$src2), - (i32 PositiveHalfWord:$src1))), i16), - (SwapInst IntRegs:$src1, IntRegs:$src2)>; -} - -let AddedComplexity = 200 in { - defm: MinMax_pats<setge, A2_max, A2_min>; - defm: MinMax_pats<setgt, A2_max, A2_min>; - defm: MinMax_pats<setle, A2_min, A2_max>; - defm: MinMax_pats<setlt, A2_min, A2_max>; - defm: MinMax_pats<setuge, A2_maxu, A2_minu>; - defm: MinMax_pats<setugt, A2_maxu, A2_minu>; - defm: MinMax_pats<setule, A2_minu, A2_maxu>; - defm: MinMax_pats<setult, A2_minu, A2_maxu>; -} - class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm> : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> { @@ -1237,16 +1039,6 @@ def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>; def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>; def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>; -class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp> - : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))), - (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; - -def: T_cmp64_rr_pat<C2_cmpeqp, seteq>; -def: T_cmp64_rr_pat<C2_cmpgtp, setgt>; -def: T_cmp64_rr_pat<C2_cmpgtup, setugt>; -def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>; -def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>; - def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd), (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { @@ -1292,12 +1084,10 @@ class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat, : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev, IsComm, "">; +let isAdd = 1 in def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>; def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>; -def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; - class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm, bit IsNeg> : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm, @@ -1307,10 +1097,6 @@ def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>; def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>; def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>; -def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; - //===----------------------------------------------------------------------===// // ALU64/ALU - //===----------------------------------------------------------------------===// @@ -1361,9 +1147,6 @@ def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>; def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>; def C2_not : T_LOGICAL_1OP<"not", 0b10>; -def: Pat<(i1 (not (i1 PredRegs:$Ps))), - (C2_not PredRegs:$Ps)>; - let hasSideEffects = 0 in class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev> : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt), @@ -1389,12 +1172,6 @@ def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>; def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>; def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>; -def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; - let hasSideEffects = 0, hasNewValue = 1 in def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt), "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> { @@ -1431,10 +1208,6 @@ def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt), // JR + //===----------------------------------------------------------------------===// -def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; - class CondStr<string CReg, bit True, bit New> { string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; } @@ -1587,8 +1360,8 @@ let isTerminator = 1, hasSideEffects = 0 in { defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel; - let isReturn = 1, isCodeGenOnly = 1 in - defm JMPret : JMPR_base<"JMPret">, PredNewRel; + let isReturn = 1, isPseudo = 1, isCodeGenOnly = 1 in + defm PS_jmpret : JMPR_base<"JMPret">, PredNewRel; } let validSubTargets = HasV60SubT in @@ -1610,23 +1383,11 @@ multiclass JMPRpt_base<string BaseOp> { defm J2_jumpr : JMPRpt_base<"JMPr">; defm J2_jump : JMPpt_base<"JMP">; -def: Pat<(br bb:$dst), - (J2_jump brtarget:$dst)>; -def: Pat<(retflag), - (JMPret (i32 R31))>; -def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; - // A return through builtin_eh_return. let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0, isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in def EH_RETURN_JMPR : T_JMPr; -def: Pat<(eh_return), - (EH_RETURN_JMPR (i32 R31))>; -def: Pat<(brind (i32 IntRegs:$dst)), - (J2_jumpr IntRegs:$dst)>; - //===----------------------------------------------------------------------===// // JR - //===----------------------------------------------------------------------===// @@ -1784,45 +1545,6 @@ def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>; let accessSize = ByteAccess, opExtentBits = 11 in def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>; -// Patterns to select load-indexed (i.e. load from base+offset). -multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, - InstHexagon MI> { - def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; - def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), - (VT (MI IntRegs:$Rs, imm:$Off))>; - def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; -} - -let AddedComplexity = 20 in { - defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>; - defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>; - defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>; - defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>; - defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>; - defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>; - - defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>; - defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>; - defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>; - defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>; - defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>; - defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>; - defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>; - defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>; - // No sextloadi1. -} - -// Sign-extending loads of i1 need to replicate the lowest bit throughout -// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should -// do the trick. -let AddedComplexity = 20 in -def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))), - (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; - //===----------------------------------------------------------------------===// // Post increment load //===----------------------------------------------------------------------===// @@ -2696,10 +2418,6 @@ def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>; def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">; def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">; -def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; - let hasNewValue = 1, opNewValue = 0 in class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern> : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8), @@ -2720,12 +2438,9 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern> } let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in -def M2_mpysip : T_MType_mpy_ri <0, u8Ext, - [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>; +def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext, []>; -def M2_mpysin : T_MType_mpy_ri <1, u8Imm, - [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs, - u8ImmPred:$u8)))]>; +def M2_mpysin : T_MType_mpy_ri <1, u8_0Imm, []>; // Assember mapped to M2_mpyi let isAsmParserOnly = 1 in @@ -2740,10 +2455,8 @@ def M2_mpyui : MInst<(outs IntRegs:$dst), let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1, isAsmParserOnly = 1 in -def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), - "$dst = mpyi($src1, #$src2)", - [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s32ImmPred:$src2))]>, ImmRegRel; +def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9_0Ext:$src2), + "$dst = mpyi($src1, #$src2)", []>, ImmRegRel; let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3, InputType = "imm" in @@ -2792,58 +2505,31 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp, } let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in { - def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext, - [(set (i32 IntRegs:$dst), - (add (mul IntRegs:$src2, u32ImmPred:$src3), - IntRegs:$src1))]>, ImmRegRel; - - def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, - [(set (i32 IntRegs:$dst), - (add (mul IntRegs:$src2, IntRegs:$src3), - IntRegs:$src1))]>, ImmRegRel; + def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext, []>, ImmRegRel; + + def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, []>, ImmRegRel; } let CextOpcode = "ADD_acc" in { let isExtentSigned = 1 in - def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, - [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), s32ImmPred:$src3), - (i32 IntRegs:$src1)))]>, ImmRegRel; - - def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, - [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - (i32 IntRegs:$src1)))]>, ImmRegRel; + def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext, []>, ImmRegRel; + + def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, []>, ImmRegRel; } let CextOpcode = "SUB_acc" in { let isExtentSigned = 1 in - def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel; + def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8_0Ext>, ImmRegRel; def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel; } let Itinerary = M_tc_3x_SLOT23 in -def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>; +def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8_0Ext>; def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>; def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>; -class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp, - PatLeaf ImmPred> - : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), - (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; - -class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> - : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), - (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; -def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>; - -def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>; -def : T_MType_acc_pat2 <M2_nacci, add, sub>; - //===----------------------------------------------------------------------===// // Template Class -- XType Vector Instructions //===----------------------------------------------------------------------===// @@ -3189,51 +2875,6 @@ def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>; def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>; def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>; -def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))), - (i64 (anyext (i32 IntRegs:$src2))))), - (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; - -def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))), - (i64 (sext (i32 IntRegs:$src2))))), - (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; - -def: Pat<(i64 (mul (is_sext_i32:$src1), - (is_sext_i32:$src2))), - (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; - -// Multiply and accumulate, use full result. -// Rxx[+-]=mpy(Rs,Rt) - -def: Pat<(i64 (add (i64 DoubleRegs:$src1), - (mul (i64 (sext (i32 IntRegs:$src2))), - (i64 (sext (i32 IntRegs:$src3)))))), - (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (sub (i64 DoubleRegs:$src1), - (mul (i64 (sext (i32 IntRegs:$src2))), - (i64 (sext (i32 IntRegs:$src3)))))), - (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (add (i64 DoubleRegs:$src1), - (mul (i64 (anyext (i32 IntRegs:$src2))), - (i64 (anyext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (add (i64 DoubleRegs:$src1), - (mul (i64 (zext (i32 IntRegs:$src2))), - (i64 (zext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (sub (i64 DoubleRegs:$src1), - (mul (i64 (anyext (i32 IntRegs:$src2))), - (i64 (anyext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (sub (i64 DoubleRegs:$src1), - (mul (i64 (zext (i32 IntRegs:$src2))), - (i64 (zext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - //===----------------------------------------------------------------------===// // MTYPE/MPYH - //===----------------------------------------------------------------------===// @@ -3375,16 +3016,6 @@ defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>; let accessSize = HalfWordAccess, isNVStorable = 0 in defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>; -class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, - InstHexagon MI> - : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), - (MI I32:$src2, imm:$offset, Value:$src1)>; - -def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>; -def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>; -def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>; -def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>; - //===----------------------------------------------------------------------===// // Template class for post increment stores with register offset. //===----------------------------------------------------------------------===// @@ -3535,116 +3166,6 @@ let addrMode = BaseImmOffset, InputType = "imm" in { u6_1Ext, 0b011, 1>; } -// Patterns for generating stores, where the address takes different forms: -// - frameindex, -// - frameindex + offset, -// - base + offset, -// - simple (base address without offset). -// These would usually be used together (via Storex_pat defined below), but -// in some cases one may want to apply different properties (such as -// AddedComplexity) to the individual patterns. -class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> - : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; -multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, - InstHexagon MI> { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; - def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; -} -multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, - InstHexagon MI> { - def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; - def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; -} -class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI> - : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), - (MI IntRegs:$Rs, 0, Value:$Rt)>; - -// Patterns for generating stores, where the address takes different forms, -// and where the value being stored is transformed through the value modifier -// ValueMod. The address forms are same as above. -class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, - InstHexagon MI> - : Pat<(Store Value:$Rs, AddrFI:$fi), - (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; -multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, - PatFrag ValueMod, InstHexagon MI> { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; - def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; -} -multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, - PatFrag ValueMod, InstHexagon MI> { - def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; - def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; -} -class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, - InstHexagon MI> - : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), - (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; - -multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, - InstHexagon MI> { - def: Storex_fi_pat <Store, Value, MI>; - defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>; - defm: Storex_add_pat <Store, Value, ImmPred, MI>; -} - -multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, - PatFrag ValueMod, InstHexagon MI> { - def: Storexm_fi_pat <Store, Value, ValueMod, MI>; - defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>; - defm: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; -} - -// Regular stores in the DAG have two operands: value and address. -// Atomic stores also have two, but they are reversed: address, value. -// To use atomic stores with the patterns, they need to have their operands -// swapped. This relies on the knowledge that the F.Fragment uses names -// "ptr" and "val". -class SwapSt<PatFrag F> - : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, - F.OperandTransform>; - -let AddedComplexity = 20 in { - defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>; - defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>; - defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>; - defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>; - - defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>; - defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>; - defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>; - defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>; -} - -// Simple patterns should be tried with the least priority. -def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>; -def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>; -def: Storex_simple_pat<store, I32, S2_storeri_io>; -def: Storex_simple_pat<store, I64, S2_storerd_io>; - -def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>; -def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>; -def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>; -def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>; - -let AddedComplexity = 20 in { - defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>; - defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>; - defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>; -} - -def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; -def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>; -def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>; - // Store predicate. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in @@ -3951,8 +3472,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Sign extend word to doubleword def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>; -def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; - // Vector saturate and pack let Defs = [USR_OVF] in { def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>; @@ -4001,22 +3520,11 @@ let Itinerary = S_2op_tc_2_SLOT23 in { def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>; } -def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)), - (i32 (sub 0, (i32 IntRegs:$src))), - (i32 IntRegs:$src))), - (A2_abs IntRegs:$src)>; - -let AddedComplexity = 50 in -def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)), - (i32 IntRegs:$src)), - (sra (i32 IntRegs:$src), (i32 31)))), - (A2_abs IntRegs:$src)>; - class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut, RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp, bit isSat, bit isRnd, list<dag> pattern = []> : SInst <(outs RCOut:$dst), - (ins RCIn:$src, u5Imm:$u5), + (ins RCIn:$src, u5_0Imm:$u5), "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "") #!if(isRnd, ":rnd", ""), pattern, "", S_2op_tc_2_SLOT23> { @@ -4049,9 +3557,7 @@ class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp, isSat, isRnd, pattern>; class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd> - : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, - [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src), - (u5ImmPred:$u5)))]>; + : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, []>; // Vector arithmetic shift right by immediate with truncate and pack def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>; @@ -4072,7 +3578,7 @@ def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>; let isAsmParserOnly = 1 in def S2_asr_i_r_rnd_goodsyntax - : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5Imm:$u5), + : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5_0Imm:$u5), "$dst = asrrnd($src, #$u5)", [], "", S_2op_tc_1_SLOT23>; @@ -4080,11 +3586,6 @@ let isAsmParserOnly = 1 in def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src), "$dst = not($src)">; -def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)), - (i32 1))), - (i32 1))), - (S2_asr_i_r_rnd IntRegs:$src1, u5ImmPred:$src2)>; - class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0> : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> { @@ -4124,9 +3625,6 @@ def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>; def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>; def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>; -def : Pat<(not (i64 DoubleRegs:$src1)), - (A2_notp DoubleRegs:$src1)>; - //===----------------------------------------------------------------------===// // STYPE/BIT + //===----------------------------------------------------------------------===// @@ -4166,27 +3664,13 @@ def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>; def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>; def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>; -// Count leading zeros. -def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; - -// Count trailing zeros: 32-bit. -def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; - -// Count leading ones. -def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; - -// Count trailing ones: 32-bit. -def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; - // The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td. // Bit set/clear/toggle let hasSideEffects = 0, hasNewValue = 1 in class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp> - : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5Imm:$u5), + : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5_0Imm:$u5), "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -4222,24 +3706,11 @@ def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>; def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>; def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>; -def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))), - (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>; -def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))), - (S2_setbit_i IntRegs:$Rs, u5ImmPred:$u5)>; -def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))), - (S2_togglebit_i IntRegs:$Rs, u5ImmPred:$u5)>; -def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))), - (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), - (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), - (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; - // Bit test let hasSideEffects = 0 in class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp> - : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5Imm:$u5), + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5_0Imm:$u5), "$Pd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_2early_SLOT23> { bits<2> Pd; @@ -4273,20 +3744,9 @@ class T_TEST_BIT_REG<string MnOp, bit IsNeg> def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>; def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>; -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), - (S2_tstbit_i IntRegs:$Rs, u5ImmPred:$u5)>; - def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), - (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (trunc (i32 IntRegs:$Rs))), - (S2_tstbit_i IntRegs:$Rs, 0)>; - def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))), - (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; -} - let hasSideEffects = 0 in class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg> - : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6), + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6_0Imm:$u6), "$Pd = "#MnOp#"($Rs, #$u6)", [], "", S_2op_tc_2early_SLOT23> { bits<2> Pd; @@ -4322,17 +3782,6 @@ def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>; def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>; def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>; -let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. - def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)), - (C2_bitsclri IntRegs:$Rs, u6ImmPred:$u6)>; - def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)), - (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; -} - -let AddedComplexity = 10 in // Complexity greater than compare reg-reg. -def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), - (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; - //===----------------------------------------------------------------------===// // STYPE/BIT - //===----------------------------------------------------------------------===// @@ -4348,14 +3797,6 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), // XTYPE/PERM + //===----------------------------------------------------------------------===// -def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))), - (i32 8)), - (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))), - (i32 16)), - (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))), - (zextloadi8 (i32 IntRegs:$b))), - (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; - //===----------------------------------------------------------------------===// // XTYPE/PERM - //===----------------------------------------------------------------------===// @@ -4395,24 +3836,6 @@ let hasSideEffects = 0, isCodeGenOnly = 1 in def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src), "$dst = $src">; - -// Patterns for loads of i1: -def: Pat<(i1 (load AddrFI:$fi)), - (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; -def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; -def: Pat<(i1 (load (i32 IntRegs:$Rs))), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; - -def I1toI32: OutPatFrag<(ops node:$Rs), - (C2_muxii (i1 $Rs), 1, 0)>; - -def I32toI1: OutPatFrag<(ops node:$Rs), - (i1 (C2_tfrrp (i32 $Rs)))>; - -defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>; -def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; - //===----------------------------------------------------------------------===// // STYPE/PRED - //===----------------------------------------------------------------------===// @@ -4436,9 +3859,7 @@ class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp, } class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp> - : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6Imm, - [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1), - u6ImmPred:$src2))]> { + : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6_0Imm, []> { bits<6> src2; let Inst{13-8} = src2; } @@ -4451,10 +3872,8 @@ def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>; // Shift left by small amount and add. let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), - (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3), - "$Rd = addasl($Rt, $Rs, #$u3)" , - [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt), - (shl (i32 IntRegs:$Rs), u3ImmPred:$u3)))], + (ins IntRegs:$Rt, IntRegs:$Rs, u3_0Imm:$u3), + "$Rd = addasl($Rt, $Rs, #$u3)" , [], "", S_3op_tc_2_SLOT23> { bits<5> Rd; bits<5> Rt; @@ -4496,12 +3915,8 @@ def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), //===----------------------------------------------------------------------===// // SYSTEM/USER + //===----------------------------------------------------------------------===// -def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; - let hasSideEffects = 1, isSoloAX = 1 in -def Y2_barrier : SYSInst<(outs), (ins), - "barrier", - [(HexagonBARRIER)],"",ST_tc_st_SLOT0> { +def Y2_barrier : SYSInst<(outs), (ins), "barrier", [],"",ST_tc_st_SLOT0> { let Inst{31-28} = 0b1010; let Inst{27-21} = 0b1000000; } @@ -4517,15 +3932,12 @@ def Y2_barrier : SYSInst<(outs), (ins), // let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { - def TFR_FI : ALU32_ri<(outs IntRegs:$Rd), - (ins IntRegs:$fi, s32Imm:$off), "">; - def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">; + def PS_fi : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$fi, s32_0Imm:$off), "">; + def PS_fia : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">; } -def: Pat<(i32 (orisadd (i32 AddrFI:$Rs), s32ImmPred:$off)), - (i32 (TFR_FI (i32 AddrFI:$Rs), s32ImmPred:$off))>; - //===----------------------------------------------------------------------===// // CRUSER - Type. //===----------------------------------------------------------------------===// @@ -4533,7 +3945,7 @@ def: Pat<(i32 (orisadd (i32 AddrFI:$Rs), s32ImmPred:$off)), let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, opExtendable = 0, hasSideEffects = 0 in class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0> - : CRInst<(outs), (ins brOp:$offset, u10Imm:$src2), + : CRInst<(outs), (ins brOp:$offset, u10_0Imm:$src2), #mnemonic#"($offset, #$src2)", [], "" , CR_tc_3x_SLOT3> { bits<9> offset; @@ -4605,7 +4017,7 @@ let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2, opExtendable = 0, isPredicateLate = 1 in class SPLOOP_iBase<string SP, bits<2> op> - : CRInst <(outs), (ins brtarget:$r7_2, u10Imm:$U10), + : CRInst <(outs), (ins brtarget:$r7_2, u10_0Imm:$U10), "p3 = sp"#SP#"loop0($r7_2, #$U10)" > { bits<9> r7_2; bits<10> U10; @@ -4733,20 +4145,12 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), let Inst{20-16} = Rs; } -// Support for generating global address. -// Taken from X86InstrInfo.td. -def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i32>, - SDTCisPtrTy<0>]>; -def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; -def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; - // HI/LO Instructions let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp> : ALU32_ri<(outs IntRegs:$dst), - (ins u16Imm:$imm_value), + (ins u16_0Imm:$imm_value), "$dst"#RegHalf#" = $imm_value", []> { bits<5> dst; bits<32> imm_value; @@ -4765,100 +4169,28 @@ let isAsmParserOnly = 1 in { def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>; } -let isMoveImm = 1, isCodeGenOnly = 1 in -def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), - "$dst.l = #LO($label@GOTREL)", - []>; - -let isMoveImm = 1, isCodeGenOnly = 1 in -def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), - "$dst.h = #HI($label@GOTREL)", - []>; - -let isReMaterializable = 1, isMoveImm = 1, - isCodeGenOnly = 1, hasSideEffects = 0 in -def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.h = #HI($global@GOT)", - []>; - -let isReMaterializable = 1, isMoveImm = 1, - isCodeGenOnly = 1, hasSideEffects = 0 in -def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.l = #LO($global@GOT)", - []>; - -let isReMaterializable = 1, isMoveImm = 1, - isCodeGenOnly = 1, hasSideEffects = 0 in -def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.h = #HI($global@GOTREL)", - []>; - -let isReMaterializable = 1, isMoveImm = 1, - isCodeGenOnly = 1, hasSideEffects = 0 in -def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.l = #LO($global@GOTREL)", - []>; - -// This pattern is incorrect. When we add small data, we should change -// this pattern to use memw(#foo). -// This is for sdata. -let isMoveImm = 1, isAsmParserOnly = 1 in -def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), - (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in -def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), imm:$global) ]>; - -// Map TLS addressses to a CONST32 instruction -def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>; -def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in -def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global), - "$dst = CONST64(#$global)", - [(set (i64 DoubleRegs:$dst), imm:$global)]>; +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in { + def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v), + "$Rd = CONST32(#$v)", []>; + def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v), + "$Rd = CONST64(#$v)", []>; +} let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1 in -def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "", - [(set (i1 PredRegs:$dst), 1)]>; +def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>; let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1 in -def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "", - [(set (i1 PredRegs:$dst), 0)]>; - -// Pseudo instructions. -def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; -def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; - -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - -def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; - -// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, -// Optional Flag and Variable Arguments. -// Its 1 Operand has pointer type. -def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>; let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - ".error \"should not emit\" ", - [(callseq_start timm:$amt)]>; + ".error \"should not emit\" ", []>; let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - ".error \"should not emit\" ", - [(callseq_end timm:$amt1, timm:$amt2)]>; + ".error \"should not emit\" ", []>; // Call subroutine indirectly. let Defs = VolatileV3.Regs in @@ -4867,260 +4199,21 @@ def J2_callr : JUMPR_MISC_CALLR<0, 1>; // Indirect tail-call. let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, isTerminator = 1, isCodeGenOnly = 1 in -def TCRETURNr : T_JMPr; +def PS_tailcall_r : T_JMPr; // Direct tail-calls. let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, isTerminator = 1, isCodeGenOnly = 1 in -def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>; - -//Tail calls. -def: Pat<(HexagonTCRet tglobaladdr:$dst), - (TCRETURNi tglobaladdr:$dst)>; -def: Pat<(HexagonTCRet texternalsym:$dst), - (TCRETURNi texternalsym:$dst)>; -def: Pat<(HexagonTCRet (i32 IntRegs:$dst)), - (TCRETURNr IntRegs:$dst)>; - -// Map from r0 = and(r1, 65535) to r0 = zxth(r1) -def: Pat<(and (i32 IntRegs:$src1), 65535), - (A2_zxth IntRegs:$src1)>; - -// Map from r0 = and(r1, 255) to r0 = zxtb(r1). -def: Pat<(and (i32 IntRegs:$src1), 255), - (A2_zxtb IntRegs:$src1)>; - -// Map Add(p1, true) to p1 = not(p1). -// Add(p1, false) should never be produced, -// if it does, it got to be mapped to NOOP. -def: Pat<(add (i1 PredRegs:$src1), -1), - (C2_not PredRegs:$src1)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3), - (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = C2_muxir(p0, r1, #i) -def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2, - (i32 IntRegs:$src3)), - (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = C2_muxri (p0, #i, r1) -def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3), - (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>; - -// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. -def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). -def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), - (A2_sxtw (LoReg DoubleRegs:$src1))>; - -// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). -def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), - (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; - -// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). -def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), - (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; - -// We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a J2_jumpf. -def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; - -def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; - -// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)), - bb:$offset)>; - -// Map from a 64-bit select to an emulated 64-bit mux. -// Hexagon does not support 64-bit MUXes; so emulate with combines. -def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src3)), - (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), - (HiReg DoubleRegs:$src3)), - (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), - (LoReg DoubleRegs:$src3)))>; - -// Map from a 1-bit select to logical ops. -// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). -def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), - (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), - (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; - -// Map for truncating from 64 immediates to 32 bit immediates. -def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), - (LoReg DoubleRegs:$src)>; - -// Map for truncating from i64 immediates to i1 bit immediates. -def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), - (C2_tfrrp (LoReg DoubleRegs:$src))>; - -// rs <= rt -> !(rs > rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; - -// rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; - -// Rss <= Rtt -> !(Rss > Rtt). -def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Map cmpne -> cmpeq. -// Hexagon_TODO: We should improve on this. -// rs != rt -> !(rs == rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), - (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>; - -// Convert setne back to xor for hexagon since we compute w/ pred registers. -def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), - (C2_xor PredRegs:$src1, PredRegs:$src2)>; - -// Map cmpne(Rss) -> !cmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; - -// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) -let AddedComplexity = 30 in -def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)), - (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; - -// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). -// rss >= rtt -> !(rtt > rss). -def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). -// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). -// rs < rt -> !(rs >= rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, - (DEC_CONST_SIGNED s32ImmPred:$src2)))>; - -// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) -def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), - (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; - -// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>; - -// Generate cmpgtu(Rs, #u9) -def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>; - -// Map from Rs >= Rt -> !(Rt > Rs). -// rs >= rt -> !(rt > rs). -def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). -// Map from (Rs <= Rt) -> !(Rs > Rt). -def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Sign extends. -// i1 -> i32 -def: Pat<(i32 (sext (i1 PredRegs:$src1))), - (C2_muxii PredRegs:$src1, -1, 0)>; - -// i1 -> i64 -def: Pat<(i64 (sext (i1 PredRegs:$src1))), - (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; - -// Zero extends. -// i1 -> i32 -def: Pat<(i32 (zext (i1 PredRegs:$src1))), - (C2_muxii PredRegs:$src1, 1, 0)>; - -// Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def: Pat<(i32 (anyext (i1 PredRegs:$src1))), - (C2_muxii PredRegs:$src1, 1, 0)>; - -// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) -def: Pat<(i64 (anyext (i1 PredRegs:$src1))), - (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; - -// Multiply 64-bit unsigned and use upper result. -def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - (A2_addp - (M2_dpmpyuu_acc_s0 - (S2_lsr_i_p - (A2_addp - (M2_dpmpyuu_acc_s0 - (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32), - (HiReg $src1), - (LoReg $src2)), - (A2_combinew (A2_tfrsi 0), - (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))), - 32), - (HiReg $src1), - (HiReg $src2)), - (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32) -)>; - -// Hexagon specific ISD nodes. -def SDTHexagonALLOCA : SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, - [SDNPHasChain]>; +def PS_tailcall_i : JInst<(outs), (ins calltarget:$dst), "", []>; // The reason for the custom inserter is to record all ALLOCA instructions // in MachineFunctionInfo. -let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1, - usesCustomInserter = 1 in -def ALLOCA: ALU32Inst<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, u32Imm:$A), "", - [(set (i32 IntRegs:$Rd), - (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>; +let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in +def PS_alloca: ALU32Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, u32_0Imm:$A), "", []>; let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in -def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>; - -def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; -def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; -let isCodeGenOnly = 1 in -def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = $src1", - [(set (i32 IntRegs:$dst), - (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>; - -let AddedComplexity = 100 in -def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), - (i32 IntRegs:$src1)>; - -def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; -def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; - -def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>; -def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>; +def PS_aligna : ALU32Inst<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>; // XTYPE/SHIFT // @@ -5137,11 +4230,8 @@ let hasNewValue = 1, opNewValue = 0 in class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1, SDNode OpNode2, bits<3> majOp, bits<2> minOp> : SInst_acc<(outs IntRegs:$Rx), - (ins IntRegs:$src1, IntRegs:$Rs, u5Imm:$u5), - "$Rx "#opc2#opc1#"($Rs, #$u5)", - [(set (i32 IntRegs:$Rx), - (OpNode2 (i32 IntRegs:$src1), - (OpNode1 (i32 IntRegs:$Rs), u5ImmPred:$u5)))], + (ins IntRegs:$src1, IntRegs:$Rs, u5_0Imm:$u5), + "$Rx "#opc2#opc1#"($Rs, #$u5)", [], "$src1 = $Rx", S_2op_tc_2_SLOT23> { bits<5> Rx; bits<5> Rs; @@ -5168,10 +4258,7 @@ class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1, SDNode OpNode2, bits<2> majOp, bits<2> minOp> : SInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt), - "$Rx "#opc2#opc1#"($Rs, $Rt)", - [(set (i32 IntRegs:$Rx), - (OpNode2 (i32 IntRegs:$src1), - (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))], + "$Rx "#opc2#opc1#"($Rs, $Rt)", [], "$src1 = $Rx", S_3op_tc_2_SLOT23 > { bits<5> Rx; bits<5> Rs; @@ -5194,11 +4281,8 @@ class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1, class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1, SDNode OpNode2, bits<3> majOp, bits<2> minOp> : SInst_acc<(outs DoubleRegs:$Rxx), - (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6Imm:$u6), - "$Rxx "#opc2#opc1#"($Rss, #$u6)", - [(set (i64 DoubleRegs:$Rxx), - (OpNode2 (i64 DoubleRegs:$src1), - (OpNode1 (i64 DoubleRegs:$Rss), u6ImmPred:$u6)))], + (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6_0Imm:$u6), + "$Rxx "#opc2#opc1#"($Rss, #$u6)", [], "$src1 = $Rxx", S_2op_tc_2_SLOT23> { bits<5> Rxx; bits<5> Rss; @@ -5225,10 +4309,7 @@ class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1, SDNode OpNode2, bits<3> majOp, bits<2> minOp> : SInst_acc<(outs DoubleRegs:$Rxx), (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt), - "$Rxx "#opc2#opc1#"($Rss, $Rt)", - [(set (i64 DoubleRegs:$Rxx), - (OpNode2 (i64 DoubleRegs:$src1), - (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))], + "$Rxx "#opc2#opc1#"($Rss, $Rt)", [], "$src1 = $Rxx", S_3op_tc_2_SLOT23> { bits<5> Rxx; bits<5> Rss; @@ -5400,9 +4481,7 @@ class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp, let hasNewValue = 1 in class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp> - : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, - [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; + : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, []>; let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp> @@ -5410,9 +4489,7 @@ class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp> class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp> - : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, - [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1), - (i32 IntRegs:$src2)))]>; + : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, []>; class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp> @@ -5473,7 +4550,7 @@ def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>; let hasSideEffects = 0 in class T_S3op_7 <string mnemonic, bit MajOp > : SInst <(outs DoubleRegs:$Rdd), - (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3_0Imm:$u3), "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" , [], "", S_3op_tc_1_SLOT23 > { bits<5> Rdd; @@ -5530,8 +4607,8 @@ class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp> bit bit13; string ImmOpStr = !cast<string>(ImmOp); - let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, 0); - let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0); + let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5}, 0); + let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0); let IClass = 0b1000; @@ -5549,42 +4626,13 @@ class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp> // Rx=insert(Rs,#u5,#U5) let hasNewValue = 1 in { def S2_insert_rp : T_S3op_insert <"insert", IntRegs>; - def S2_insert : T_S2op_insert <0b1111, IntRegs, u5Imm>; + def S2_insert : T_S2op_insert <0b1111, IntRegs, u5_0Imm>; } // Rxx=insert(Rss,Rtt) // Rxx=insert(Rss,#u6,#U6) def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>; -def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>; - - -def SDTHexagonINSERT: - SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; -def SDTHexagonINSERTRP: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i64>]>; - -def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; -def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; - -def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2), - (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>; -def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2), - (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>; -def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), - (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; -def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), - (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; - -let AddedComplexity = 100 in -def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), - (i32 (extloadi8 (add I32:$b, 3))), - 24, 8), - (i32 16)), - (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), - (zextloadi8 I32:$b)), - (A2_swiz (L2_loadri_io I32:$b, 0))>; +def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6_0Imm>; //===----------------------------------------------------------------------===// @@ -5622,10 +4670,10 @@ class T_S2op_extract <string mnemonic, bits<4> RegTyBits, bit bit13; string ImmOpStr = !cast<string>(ImmOp); - let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, + let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5}, !if (!eq(mnemonic, "extractu"), 0, 1)); - let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0); + let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0); let IClass = 0b1000; @@ -5644,38 +4692,15 @@ class T_S2op_extract <string mnemonic, bits<4> RegTyBits, // Rdd=extractu(Rss,Rtt) // Rdd=extractu(Rss,#u6,#U6) def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>; -def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>; +def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6_0Imm>; // Rd=extractu(Rs,Rtt) // Rd=extractu(Rs,#u5,#U5) let hasNewValue = 1 in { def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>; - def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>; + def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5_0Imm>; } -def SDTHexagonEXTRACTU: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTHexagonEXTRACTURP: - SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i64>]>; - -def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; -def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; - -def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3), - (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3), - (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), - (S2_extractu_rp I32:$src1, I64:$src2)>; -def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), - (S2_extractup_rp I64:$src1, I64:$src2)>; - -// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) -def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), - (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>; - //===----------------------------------------------------------------------===// // :raw for of tableindx[bdhw] insns //===----------------------------------------------------------------------===// @@ -5683,7 +4708,7 @@ def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in class tableidxRaw<string OpStr, bits<2>MinOp> : SInst <(outs IntRegs:$Rx), - (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, s6Imm:$S6), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, s6_0Imm:$S6), "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw", [], "$Rx = $_dst_" > { bits<5> Rx; @@ -5714,7 +4739,7 @@ def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>; let isPseudo = 1 in class tableidx_goodsyntax <string mnemonic> : SInst <(outs IntRegs:$Rx), - (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, u5_0Imm:$u5), "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)", [], "$Rx = $_dst_" >; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td index 9024a43..225f944 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -11,12 +11,6 @@ // //===----------------------------------------------------------------------===// -def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - -def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - //===----------------------------------------------------------------------===// // J + //===----------------------------------------------------------------------===// @@ -66,11 +60,13 @@ multiclass T_Calls<bit CSR, string ExtStr> { defm J2_call: T_Calls<1, "">, PredRel; -let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in -def CALLv3nr : T_Call<1, "">, PredRel; +let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, + Defs = VolatileV3.Regs in +def PS_call_nr : T_Call<1, "">, PredRel; -let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [PC, R31, R6, R7, P0] in -def CALLstk : T_Call<0, "">, PredRel; +let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, + Defs = [PC, R31, R6, R7, P0] in +def PS_call_stk : T_Call<0, "">, PredRel; //===----------------------------------------------------------------------===// // J - @@ -83,7 +79,7 @@ def CALLstk : T_Call<0, "">, PredRel; // Call subroutine from register. let isCodeGenOnly = 1, Defs = VolatileV3.Regs in { - def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return. + def PS_callr_nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return. } //===----------------------------------------------------------------------===// @@ -105,9 +101,7 @@ def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>; let hasSideEffects = 0, isAsmParserOnly = 1 in def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd), - (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", - [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))), - (i64 DoubleRegs:$Rt))))], + (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23>; @@ -137,60 +131,10 @@ def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>; def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>; def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>; -multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { - defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>; -} - -let AddedComplexity = 200 in { - defm: MinMax_pats_p<setge, A2_maxp, A2_minp>; - defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>; - defm: MinMax_pats_p<setle, A2_minp, A2_maxp>; - defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>; - defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>; - defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>; - defm: MinMax_pats_p<setule, A2_minup, A2_maxup>; - defm: MinMax_pats_p<setult, A2_minup, A2_maxup>; -} - //===----------------------------------------------------------------------===// // ALU64/ALU - //===----------------------------------------------------------------------===// - - - -//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; - -// Map call instruction -def : Pat<(callv3 (i32 IntRegs:$dst)), - (J2_callr (i32 IntRegs:$dst))>; -def : Pat<(callv3 tglobaladdr:$dst), - (J2_call tglobaladdr:$dst)>; -def : Pat<(callv3 texternalsym:$dst), - (J2_call texternalsym:$dst)>; -def : Pat<(callv3 tglobaltlsaddr:$dst), - (J2_call tglobaltlsaddr:$dst)>; - -def : Pat<(callv3nr (i32 IntRegs:$dst)), - (CALLRv3nr (i32 IntRegs:$dst))>; -def : Pat<(callv3nr tglobaladdr:$dst), - (CALLv3nr tglobaladdr:$dst)>; -def : Pat<(callv3nr texternalsym:$dst), - (CALLv3nr texternalsym:$dst)>; - //===----------------------------------------------------------------------===// // :raw form of vrcmpys:hi/lo insns //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 398d2d3..18943a0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -30,9 +30,6 @@ def DuplexIClassD: InstDuplex < 0xD >; def DuplexIClassE: InstDuplex < 0xE >; def DuplexIClassF: InstDuplex < 0xF >; -def addrga: PatLeaf<(i32 AddrGA:$Addr)>; -def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; - let hasSideEffects = 0 in class T_Immext<Operand ImmType> : EXTENDERInst<(outs), (ins ImmType:$imm), @@ -53,14 +50,6 @@ let isCodeGenOnly = 1 in { def A4_ext_g : T_Immext<globaladdress>; } -def BITPOS32 : SDNodeXForm<imm, [{ - // Return the bit position we will set [0-31]. - // As an SDNode. - int32_t imm = N->getSExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - - // Hexagon V4 Architecture spec defines 8 instruction classes: // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the // compiler) @@ -145,22 +134,6 @@ def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>; def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>; def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>; -// Pats for instruction selection. - -// A class to embed the usual comparison patfrags within a zext to i32. -// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same -// names, or else the frag's "body" won't match the operands. -class CmpInReg<PatFrag Op> - : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; - -def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>; -def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>; - -def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>; -def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>; - -def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>; - class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm> : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>, @@ -190,21 +163,6 @@ def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>; def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>; def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>; -let AddedComplexity = 100 in { - def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 255), 0)), - (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 255), 0)), - (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; - def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 65535), 0)), - (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 65535), 0)), - (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; -} - class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm, Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits> : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm), @@ -234,15 +192,15 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm, let Inst{1-0} = Pd; } -def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8Imm, 0, 0, 8>; -def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8Imm, 0, 1, 8>; -def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>; -def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8Ext, 1, 1, 8>; -def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8Ext, 1, 1, 8>; -def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>; +def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8_0Imm, 0, 0, 8>; +def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8_0Imm, 0, 1, 8>; +def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7_0Ext, 1, 0, 7>; +def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8_0Ext, 1, 1, 8>; +def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8_0Ext, 1, 1, 8>; +def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7_0Ext, 1, 0, 7>; class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> - : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8), + : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8_0Ext:$s8), "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel { let InputType = "imm"; @@ -270,16 +228,6 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>; def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>; -def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))), - (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>; -def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))), - (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>; - -// Preserve the S2_tstbit_r generation -def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), - (i32 IntRegs:$src1))), 0)))), - (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; - //===----------------------------------------------------------------------===// // ALU32 - //===----------------------------------------------------------------------===// @@ -308,26 +256,16 @@ class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr> } let opExtendable = 2 in -def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8), +def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8_0Ext:$s8), "$Rdd = combine($Rs, #$s8)">; let opExtendable = 1 in -def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs), +def A4_combineir : T_Combine1<0b01, (ins s8_0Ext:$s8, IntRegs:$Rs), "$Rdd = combine(#$s8, $Rs)">; -// The complexity of the combines involving immediates should be greater -// than the complexity of the combine with two registers. -let AddedComplexity = 50 in { -def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i), - (A4_combineri IntRegs:$r, s32ImmPred:$i)>; - -def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r), - (A4_combineir s32ImmPred:$i, IntRegs:$r)>; -} - // A4_combineii: Set two small immediates. let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in -def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), +def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8_0Imm:$s8, u6_0Ext:$U6), "$Rdd = combine(#$s8, #$U6)"> { bits<5> Rdd; bits<8> s8; @@ -341,12 +279,6 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), let Inst{4-0} = Rdd; } -// The complexity of the combine with two immediates should be greater than -// the complexity of a combine involving a register. -let AddedComplexity = 75 in -def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6), - (A4_combineii imm:$s8, imm:$u6)>; - //===----------------------------------------------------------------------===// // ALU32/PERM - //===----------------------------------------------------------------------===// @@ -355,39 +287,6 @@ def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6), // LD + //===----------------------------------------------------------------------===// -def Zext64: OutPatFrag<(ops node:$Rs), - (i64 (A4_combineir 0, (i32 $Rs)))>; -def Sext64: OutPatFrag<(ops node:$Rs), - (i64 (A2_sxtw (i32 $Rs)))>; - -// Patterns to generate indexed loads with different forms of the address: -// - frameindex, -// - base + offset, -// - base (without offset). -multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, - PatLeaf ImmPred, InstHexagon MI> { - def: Pat<(VT (Load AddrFI:$fi)), - (VT (ValueMod (MI AddrFI:$fi, 0)))>; - def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), - (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; - def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), - (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; - def: Pat<(VT (Load (i32 IntRegs:$Rs))), - (VT (ValueMod (MI IntRegs:$Rs, 0)))>; -} - -defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; -defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; -defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; -defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; -defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; -defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; -defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>; -defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>; - -// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). -def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; - //===----------------------------------------------------------------------===// // Template class for load instructions with Absolute set addressing mode. //===----------------------------------------------------------------------===// @@ -395,7 +294,7 @@ let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet, hasSideEffects = 0 in class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>: LDInst<(outs RC:$dst1, IntRegs:$dst2), - (ins u6Ext:$addr), + (ins u6_0Ext:$addr), "$dst1 = "#mnemonic#"($dst2 = #$addr)", []> { bits<7> name; @@ -447,7 +346,7 @@ let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1, opExtentBits = 6, opExtendable = 3 in class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC, bits<4> MajOp> - : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3), + : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3), "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)", [] >, ImmRegShl { bits<5> dst; @@ -495,48 +394,12 @@ let accessSize = DoubleWordAccess in def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>; -multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> { - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))), - (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tglobaladdr:$src2)))), - (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tconstpool:$src3)))), - (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tconstpool:$src2)))), - (MI IntRegs:$src1, 0, tconstpool:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tjumptable:$src3)))), - (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tjumptable:$src2)))), - (MI IntRegs:$src1, 0, tjumptable:$src2)>; -} - -let AddedComplexity = 60 in { -defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>; -defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>; -defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>; - -defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>; -defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>; -defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>; - -defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>; -defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>; -} - //===----------------------------------------------------------------------===// // Template classes for the non-predicated load instructions with // base + register offset addressing mode //===----------------------------------------------------------------------===// class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>: - LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$u2), + LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2_0Imm:$u2), "$dst = "#mnemonic#"($src1 + $src2<<#$u2)", [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel { bits<5> dst; @@ -563,7 +426,7 @@ let isPredicated = 1 in class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isNot, bit isPredNew>: LDInst <(outs RC:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$u2), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2_0Imm:$u2), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)", [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel { @@ -628,50 +491,6 @@ defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>; let accessSize = DoubleWordAccess in defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>; -// 'def pats' for load instructions with base + register offset and non-zero -// immediate value. Immediate value is used to left-shift the second -// register operand. -class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI> - : Pat<(VT (Load (add (i32 IntRegs:$Rs), - (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; - -let AddedComplexity = 40 in { - def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxs_pat<load, i32, L4_loadri_rr>; - def: Loadxs_pat<load, i64, L4_loadrd_rr>; -} - -// 'def pats' for load instruction base + register offset and -// zero immediate value. -class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI> - : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; - -let AddedComplexity = 20 in { - def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxs_simple_pat<load, i32, L4_loadri_rr>; - def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>; -} - -// zext i1->i64 -def: Pat<(i64 (zext (i1 PredRegs:$src1))), - (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>; - -// zext i32->i64 -def: Pat<(i64 (zext (i32 IntRegs:$src1))), - (Zext64 IntRegs:$src1)>; - //===----------------------------------------------------------------------===// // LD - //===----------------------------------------------------------------------===// @@ -688,7 +507,7 @@ let isExtended = 1, opExtendable = 1, opExtentBits = 6, class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC, bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst<(outs IntRegs:$dst), - (ins u6Ext:$addr, RC:$src), + (ins u6_0Ext:$addr, RC:$src), mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel { bits<5> dst; bits<6> addr; @@ -727,7 +546,7 @@ isExtended = 1, opExtentBits= 6 in class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp, MemAccessSize AccessSz > : NVInst <(outs IntRegs:$dst), - (ins u6Ext:$addr, IntRegs:$src), + (ins u6_0Ext:$addr, IntRegs:$src), mnemonic#"($dst = #$addr) = $src.new">, NewValueRel { bits<5> dst; bits<6> addr; @@ -757,7 +576,7 @@ let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm", class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC, bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4), + (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, RC:$src4), mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""), []>, ImmRegShl, NewValueRel { @@ -794,35 +613,12 @@ def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>; def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110, DoubleWordAccess>; -let AddedComplexity = 40 in -multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, - PatFrag stOp> { - def : Pat<(stOp (VT RC:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u32ImmPred:$src3)), - (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; -} - -defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>; -defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>; -defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>; -defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>; - let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset, opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp, MemAccessSize AccessSz> : NVInst <(outs ), - (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, IntRegs:$src4), mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel { bits<5> src1; bits<2> src2; @@ -854,7 +650,7 @@ def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>; //===----------------------------------------------------------------------===// let isPredicable = 1 in class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH> - : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt), + : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt), mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""), [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel { @@ -885,7 +681,7 @@ let isPredicated = 1 in class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isNot, bit isPredNew, bit isH> : STInst <(outs), - (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt), + (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt), !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""), @@ -921,7 +717,7 @@ class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, //===----------------------------------------------------------------------===// let isPredicable = 1, isNewValue = 1, opNewValue = 3 in class T_store_new_rr <string mnemonic, bits<2> MajOp> : - NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt), + NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt), mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new", [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel { @@ -948,7 +744,7 @@ class T_store_new_rr <string mnemonic, bits<2> MajOp> : let isPredicated = 1, isNewValue = 1, opNewValue = 4 in class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew> : NVInst<(outs), - (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt), + (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt), !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new", [], "", V4LDST_tc_st_SLOT0>, AddrModeRel { @@ -1035,48 +831,13 @@ let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in { defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>; } -class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI> - : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs), - (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))), - (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; - -let AddedComplexity = 40 in { - def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>; - def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>; - def: Storexs_pat<store, I32, S4_storeri_rr>; - def: Storexs_pat<store, I64, S4_storerd_rr>; -} - -class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI> - : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), - (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; - -let AddedComplexity = 20 in { - def: Store_rr_pat<truncstorei8, I32, S4_storerb_rr>; - def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>; - def: Store_rr_pat<store, I32, S4_storeri_rr>; - def: Store_rr_pat<store, I64, S4_storerd_rr>; -} - - -// memd(Rx++#s4:3)=Rtt -// memd(Rx++#s4:3:circ(Mu))=Rtt -// memd(Rx++I:circ(Mu))=Rtt -// memd(Rx++Mu)=Rtt -// memd(Rx++Mu:brev)=Rtt -// memd(gp+#u16:3)=Rtt - -// Store doubleword conditionally. -// if ([!]Pv[.new]) memd(#u6)=Rtt -// TODO: needs to be implemented. - //===----------------------------------------------------------------------===// // Template class //===----------------------------------------------------------------------===// let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2 in class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp > - : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8Ext:$S8), + : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8_0Ext:$S8), mnemonic#"($Rs+#$offset)=#$S8", [], "", V4LDST_tc_st_SLOT01>, ImmRegRel, PredNewRel { @@ -1105,7 +866,7 @@ let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6, class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp, bit isPredNot, bit isPredNew > : STInst <(outs ), - (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6Ext:$S6), + (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6_0Ext:$S6), !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", ") ")#mnemonic#"($Rs+#$offset)=#$S6", [], "", V4LDST_tc_st_SLOT01>, @@ -1173,126 +934,6 @@ let hasSideEffects = 0, addrMode = BaseImmOffset, defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>; } -def IMM_BYTE : SDNodeXForm<imm, [{ - // -1 etc is represented as 255 etc - // assigning to a byte restores our desired signed value. - int8_t imm = N->getSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def IMM_HALF : SDNodeXForm<imm, [{ - // -1 etc is represented as 65535 etc - // assigning to a short restores our desired signed value. - int16_t imm = N->getSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def IMM_WORD : SDNodeXForm<imm, [{ - // -1 etc can be represented as 4294967295 etc - // Currently, it's not doing this. But some optimization - // might convert -1 to a large +ve number. - // assigning to a word restores our desired signed value. - int32_t imm = N->getSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; -def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; -def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; - -// Emit store-immediate, but only when the stored value will not be constant- -// extended. The reason for that is that there is no pass that can optimize -// constant extenders in store-immediate instructions. In some cases we can -// end up will a number of such stores, all of which store the same extended -// value (e.g. after unrolling a loop that initializes floating point array). - -// Predicates to determine if the 16-bit immediate is expressible as a sign- -// extended 8-bit immediate. Store-immediate-halfword will ignore any bits -// beyond 0..15, so we don't care what is in there. - -def i16in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int16_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - -// Predicates to determine if the 32-bit immediate is expressible as a sign- -// extended 8-bit immediate. -def i32in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int32_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - - -let AddedComplexity = 40 in { - // Even though the offset is not extendable in the store-immediate, we - // can still generate the fi# in the base address. If the final offset - // is not valid for the instruction, we will replace it with a scratch - // register. -// def: Storexm_fi_pat <truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>; -// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf, -// S4_storeirh_io>; -// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>; - -// defm: Storexm_fi_add_pat <truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte, -// S4_storeirb_io>; -// defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred, -// ToImmHalf, S4_storeirh_io>; -// defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord, -// S4_storeiri_io>; - - defm: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte, - S4_storeirb_io>; - defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf, - S4_storeirh_io>; - defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord, - S4_storeiri_io>; -} - -def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>; -def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>; -def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>; - -// memb(Rx++#s4:0:circ(Mu))=Rt -// memb(Rx++I:circ(Mu))=Rt -// memb(Rx++Mu)=Rt -// memb(Rx++Mu:brev)=Rt -// memb(gp+#u16:0)=Rt - -// Store halfword. -// TODO: needs to be implemented -// memh(Re=#U6)=Rt.H -// memh(Rs+#s11:1)=Rt.H -// memh(Rs+Ru<<#u2)=Rt.H -// TODO: needs to be implemented. - -// memh(Ru<<#u2+#U6)=Rt.H -// memh(Rx++#s4:1:circ(Mu))=Rt.H -// memh(Rx++#s4:1:circ(Mu))=Rt -// memh(Rx++I:circ(Mu))=Rt.H -// memh(Rx++I:circ(Mu))=Rt -// memh(Rx++Mu)=Rt.H -// memh(Rx++Mu)=Rt -// memh(Rx++Mu:brev)=Rt.H -// memh(Rx++Mu:brev)=Rt -// memh(gp+#u16:1)=Rt -// if ([!]Pv[.new]) memh(#u6)=Rt.H -// if ([!]Pv[.new]) memh(#u6)=Rt - -// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H -// TODO: needs to be implemented. - -// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H -// TODO: Needs to be implemented. - -// Store word. -// memw(Re=#U6)=Rt -// TODO: Needs to be implemented. -// memw(Rx++#s4:2)=Rt -// memw(Rx++#s4:2:circ(Mu))=Rt -// memw(Rx++I:circ(Mu))=Rt -// memw(Rx++Mu)=Rt -// memw(Rx++Mu:brev)=Rt - //===----------------------------------------------------------------------=== // ST - //===----------------------------------------------------------------------=== @@ -1685,7 +1326,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11, class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond, bit isTak> : NVInst_V4<(outs), - (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), + (ins IntRegs:$src1, u5_0Imm:$src2, brtarget:$offset), "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:" #!if(isTak, "t","nt")#" $offset", []> { @@ -1738,19 +1379,22 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, // with a register and an hardcoded 0/-1 immediate value. //===----------------------------------------------------------------------===// -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11, +let isExtendable = 1, isExtentSigned = 1, opExtentBits = 11, opExtentAlign = 2 in class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal, bit isNegCond, bit isTak> : NVInst_V4<(outs), - (ins IntRegs:$src1, brtarget:$offset), + !if(!eq(ImmVal, "{-1}"), + (ins IntRegs:$src1, n1Const:$n1, brtarget:$offset), + (ins IntRegs:$src1, brtarget:$offset)), "if ("#!if(isNegCond, "!","")#mnemonic - #"($src1.new, #" # ImmVal # ")) jump:" + #"($src1.new, #" # !if(!eq(ImmVal, "{-1}"), "$n1", ImmVal) # ")) jump:" #!if(isTak, "t","nt")#" $offset", []> { let isTaken = isTak; let isPredicatedFalse = isNegCond; let isTaken = isTak; + let opExtendable = !if(!eq(ImmVal, "{-1}"), 2, 1); bits<3> src1; bits<11> offset; @@ -1787,8 +1431,8 @@ multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1, Defs = [PC], hasSideEffects = 0 in { defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; - defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; - defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; + defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "{-1}">, PredRel; + defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "{-1}">, PredRel; } // J4_hintjumpr: Hint indirect conditional jump. @@ -1814,7 +1458,7 @@ def J4_hintjumpr: JRInst < // PC-relative add let hasNewValue = 1, isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in -def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6), +def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6_0Ext:$u6), "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > { bits<5> Rd; bits<6> u6; @@ -1860,48 +1504,6 @@ def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>; def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>; def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>; -// op(Ps, op(Pt, Pu)) -class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI> - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; - -// op(Ps, op(Pt, ~Pu)) -class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI> - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; - -def: LogLog_pat<and, and, C4_and_and>; -def: LogLog_pat<and, or, C4_and_or>; -def: LogLog_pat<or, and, C4_or_and>; -def: LogLog_pat<or, or, C4_or_or>; - -def: LogLogNot_pat<and, and, C4_and_andn>; -def: LogLogNot_pat<and, or, C4_and_orn>; -def: LogLogNot_pat<or, and, C4_or_andn>; -def: LogLogNot_pat<or, or, C4_or_orn>; - -//===----------------------------------------------------------------------===// -// PIC: Support for PIC compilations. The patterns and SD nodes defined -// below are needed to support code generation for PIC -//===----------------------------------------------------------------------===// - -def SDT_HexagonAtGot - : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_HexagonAtPcrel - : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -// AT_GOT address-of-GOT, address-of-global, offset-in-global -def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; -// AT_PCREL address-of-global -def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; - -def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), - (L2_loadri_io I32:$got, imm:$addr)>; -def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), - (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; -def: Pat<(HexagonAtPcrel I32:$addr), - (C4_addipc imm:$addr)>; - //===----------------------------------------------------------------------===// // CR - //===----------------------------------------------------------------------===// @@ -1914,11 +1516,6 @@ def: Pat<(HexagonAtPcrel I32:$addr), def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>; def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>; -def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), - (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; -def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), - (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; - let hasNewValue = 1, hasSideEffects = 0 in def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { @@ -1938,10 +1535,8 @@ def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 3 in def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), - (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), - "$Rd = add($Rs, add($Ru, #$s6))" , - [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), - (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))], + (ins IntRegs:$Rs, IntRegs:$Ru, s6_0Ext:$s6), + "$Rd = add($Rs, add($Ru, #$s6))" , [], "", ALU64_tc_2_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -1962,7 +1557,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), - (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru), + (ins IntRegs:$Rs, s6_0Ext:$s6, IntRegs:$Ru), "$Rd = add($Rs, sub(#$s6, $Ru))", [], "", ALU64_tc_2_SLOT23> { bits<5> Rd; @@ -1981,40 +1576,12 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), let Inst{4-0} = Ru; } -// Rd=add(Rs,sub(#s6,Ru)) -def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2, - (i32 IntRegs:$src3))), - (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; - -// Rd=sub(add(Rs,#s6),Ru) -def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2), - (i32 IntRegs:$src3)), - (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; - -// Rd=add(sub(Rs,Ru),#s6) -def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), - (s32ImmPred:$src2)), - (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; - - -// Add or subtract doublewords with carry. -//TODO: -// Rdd=add(Rss,Rtt,Px):carry -//TODO: -// Rdd=sub(Rss,Rtt,Px):carry - -// Extract bitfield -// Rdd=extract(Rss,#u6,#U6) -// Rdd=extract(Rss,Rtt) -// Rd=extract(Rs,Rtt) -// Rd=extract(Rs,#u5,#U5) - def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>; -def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6Imm>; +def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6_0Imm>; let hasNewValue = 1 in { def S4_extract_rp : T_S3op_extract<"extract", 0b01>; - def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5Imm>; + def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5_0Imm>; } // Complex add/sub halfwords/words @@ -2041,10 +1608,7 @@ let hasSideEffects = 0 in def M4_xor_xacc : SInst <(outs DoubleRegs:$Rxx), (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), - "$Rxx ^= xor($Rss, $Rtt)", - [(set (i64 DoubleRegs:$Rxx), - (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss), - (i64 DoubleRegs:$Rtt))))], + "$Rxx ^= xor($Rss, $Rtt)", [], "$dst2 = $Rxx", S_3op_tc_1_SLOT23> { bits<5> Rxx; bits<5> Rss; @@ -2064,7 +1628,7 @@ def M4_xor_xacc let hasSideEffects = 0 in def S4_vrcrotate : SInst <(outs DoubleRegs:$Rdd), - (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + (ins DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2), "$Rdd = vrcrotate($Rss, $Rt, #$u2)", [], "", S_3op_tc_3x_SLOT23> { bits<5> Rdd; @@ -2088,7 +1652,7 @@ def S4_vrcrotate let hasSideEffects = 0 in def S4_vrcrotate_acc : SInst <(outs DoubleRegs:$Rxx), - (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2), "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [], "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> { bits<5> Rxx; @@ -2144,10 +1708,8 @@ let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10, opExtendable = 3 in def S4_or_andix: ALU64Inst<(outs IntRegs:$Rx), - (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10), - "$Rx = or($Ru, and($_src_, #$s10))" , - [(set (i32 IntRegs:$Rx), - (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] , + (ins IntRegs:$Ru, IntRegs:$_src_, s10_0Ext:$s10), + "$Rx = or($Ru, and($_src_, #$s10))" , [] , "$_src_ = $Rx", ALU64_tc_2_SLOT23> { bits<5> Rx; bits<5> Ru; @@ -2266,33 +1828,13 @@ def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>; def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>; def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>; -def: T_MType_acc_pat2 <M4_or_xor, xor, or>; -def: T_MType_acc_pat2 <M4_and_xor, xor, and>; -def: T_MType_acc_pat2 <M4_or_and, and, or>; -def: T_MType_acc_pat2 <M4_and_and, and, and>; -def: T_MType_acc_pat2 <M4_xor_and, and, xor>; -def: T_MType_acc_pat2 <M4_or_or, or, or>; -def: T_MType_acc_pat2 <M4_and_or, or, and>; -def: T_MType_acc_pat2 <M4_xor_or, or, xor>; - -class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp> - : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, - (not IntRegs:$src3)))), - (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>; - -def: T_MType_acc_pat3 <M4_or_andn, and, or>; -def: T_MType_acc_pat3 <M4_and_andn, and, and>; -def: T_MType_acc_pat3 <M4_xor_andn, and, xor>; - // Compound or-or and or-and let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 10, opExtendable = 3 in class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode> : MInst_acc <(outs IntRegs:$Rx), - (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10), - "$Rx |= "#mnemonic#"($Rs, #$s10)", - [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1), - (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))], + (ins IntRegs:$src1, IntRegs:$Rs, s10_0Ext:$s10), + "$Rx |= "#mnemonic#"($Rs, #$s10)", [], "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel { bits<5> Rx; bits<5> Rs; @@ -2363,21 +1905,8 @@ def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>; def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>; def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>; -// Count trailing zeros: 64-bit. -def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; - -// Count trailing ones: 64-bit. -def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; - -// Define leading/trailing patterns that require zero-extensions to 64 bits. -def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; -def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; -def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; -def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; - - let hasSideEffects = 0, hasNewValue = 1 in -def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6), +def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6_0Imm:$s6), "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { bits<5> Rs; bits<5> Rd; @@ -2392,7 +1921,7 @@ def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6), } let hasSideEffects = 0, hasNewValue = 1 in -def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6), +def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6_0Imm:$s6), "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { bits<5> Rs; bits<5> Rd; @@ -2411,41 +1940,10 @@ def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6), def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>; def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>; -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), - (S4_ntstbit_i (i32 IntRegs:$Rs), u5ImmPred:$u5)>; - def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), - (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>; -} - -// Add extra complexity to prefer these instructions over bitsset/bitsclr. -// The reason is that tstbit/ntstbit can be folded into a compound instruction: -// if ([!]tstbit(...)) jump ... -let AddedComplexity = 100 in -def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), - (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; - -let AddedComplexity = 100 in -def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), - (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; - def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>; def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>; def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>; -// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be -// represented as a compare against "value & 0xFF", which is an exact match -// for cmpb (same for cmph). The patterns below do not contain any additional -// complexity that would make them preferable, and if they were actually used -// instead of cmpb/cmph, they would result in a compare against register that -// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). -def: Pat<(i1 (setne (and I32:$Rs, u6ImmPred:$u6), 0)), - (C4_nbitsclri I32:$Rs, u6ImmPred:$u6)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), - (C4_nbitsclr I32:$Rs, I32:$Rt)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), - (C4_nbitsset I32:$Rs, I32:$Rt)>; - //===----------------------------------------------------------------------===// // XTYPE/BIT - //===----------------------------------------------------------------------===// @@ -2458,11 +1956,8 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), - (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6), - "$Rd = add(#$u6, mpyi($Rs, #$U6))" , - [(set (i32 IntRegs:$Rd), - (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6), - u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + (ins u6_0Ext:$u6, IntRegs:$Rs, u6_0Imm:$U6), + "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [],"",ALU64_tc_3x_SLOT23> { bits<5> Rd; bits<6> u6; bits<5> Rs; @@ -2484,11 +1979,8 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), - (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), - "$Rd = add(#$u6, mpyi($Rs, $Rt))" , - [(set (i32 IntRegs:$Rd), - (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))], - "", ALU64_tc_3x_SLOT23>, ImmRegRel { + (ins u6_0Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), + "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [], "", ALU64_tc_3x_SLOT23>, ImmRegRel { bits<5> Rd; bits<6> u6; bits<5> Rs; @@ -2509,9 +2001,7 @@ let hasNewValue = 1 in class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins> : ALU64Inst <(outs IntRegs:$dst), ins, "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))", - "#$src2, $src3))"), - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))], + "#$src2, $src3))"), [], "", ALU64_tc_3x_SLOT23> { bits<5> dst; bits<5> src1; @@ -2537,16 +2027,14 @@ def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred, let isExtendable = 1, opExtentBits = 6, opExtendable = 3, CextOpcode = "ADD_MPY", InputType = "imm" in -def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred, - (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel; +def M4_mpyri_addr : T_AddMpy<0b1, u32_0ImmPred, + (ins IntRegs:$src1, IntRegs:$src3, u6_0Ext:$src2)>, ImmRegRel; // Rx=add(Ru,mpyi(Rx,Rs)) let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs), - "$Rx = add($Ru, mpyi($_src_, $Rs))", - [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru), - (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))], + "$Rx = add($Ru, mpyi($_src_, $Rs))", [], "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel { bits<5> Rx; bits<5> Ru; @@ -2637,24 +2125,23 @@ class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd> // Vector compare bytes def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>; -def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>; let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>; -def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8Imm>; -def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8Imm>; -def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>; +def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8_0Imm>; +def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8_0Imm>; +def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7_0Imm>; // Vector compare halfwords -def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8Imm>; -def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8Imm>; -def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>; +def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8_0Imm>; +def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8_0Imm>; +def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7_0Imm>; // Vector compare words -def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8Imm>; -def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8Imm>; -def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>; +def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8_0Imm>; +def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8_0Imm>; +def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7_0Imm>; //===----------------------------------------------------------------------===// // XTYPE/SHIFT + @@ -2666,13 +2153,11 @@ def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>; // Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5)) let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, hasNewValue = 1, opNewValue = 0 in -class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh, - bit asl_lsr, bits<2> MajOp, InstrItinClass Itin> - : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5), +class T_S4_ShiftOperate<string MnOp, string MnSh, bit asl_lsr, + bits<2> MajOp, InstrItinClass Itin> + : MInst_acc<(outs IntRegs:$Rd), (ins u8_0Ext:$u8, IntRegs:$Rx, u5_0Imm:$U5), "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))", - [(set (i32 IntRegs:$Rd), - (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))], - "$Rd = $Rx", Itin> { + [], "$Rd = $Rx", Itin> { bits<5> Rd; bits<8> u8; @@ -2691,32 +2176,15 @@ class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh, let Inst{2-1} = MajOp; } -multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp, - InstrItinClass Itin> { - def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", Op, shl, 0, MajOp, Itin>; - def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>; -} - -let AddedComplexity = 200 in { - defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>; - defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>; +multiclass T_ShiftOperate<string mnemonic, bits<2> MajOp, InstrItinClass Itin> { + def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", 0, MajOp, Itin>; + def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", 1, MajOp, Itin>; } -let AddedComplexity = 30 in -defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>; - -defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>; - -let AddedComplexity = 200 in { - def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), - (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; - def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), - (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), - (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), - (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; -} +defm S4_addi : T_ShiftOperate<"add", 0b10, ALU64_tc_2_SLOT23>; +defm S4_andi : T_ShiftOperate<"and", 0b00, ALU64_tc_2_SLOT23>; +defm S4_ori : T_ShiftOperate<"or", 0b01, ALU64_tc_1_SLOT23>; +defm S4_subi : T_ShiftOperate<"sub", 0b11, ALU64_tc_1_SLOT23>; // Vector conditional negate // Rdd=vcnegh(Rss,Rt) @@ -2806,11 +2274,8 @@ def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>; // Shift an immediate left by register amount. let hasNewValue = 1, hasSideEffects = 0 in -def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt), - "$Rd = lsl(#$s6, $Rt)" , - [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6, - (i32 IntRegs:$Rt)))], - "", S_3op_tc_1_SLOT23> { +def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6_0Imm:$s6, IntRegs:$Rt), + "$Rd = lsl(#$s6, $Rt)" , [], "", S_3op_tc_1_SLOT23> { bits<5> Rd; bits<6> s6; bits<5> Rt; @@ -2833,71 +2298,6 @@ def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt), // MEMOP //===----------------------------------------------------------------------===// -def m5Imm8Pred : PatLeaf<(i32 imm), [{ - int8_t v = (int8_t)N->getSExtValue(); - return v > -32 && v <= -1; -}]>; - -def m5Imm16Pred : PatLeaf<(i32 imm), [{ - int16_t v = (int16_t)N->getSExtValue(); - return v > -32 && v <= -1; -}]>; - -def Clr5Imm8Pred : PatLeaf<(i32 imm), [{ - uint32_t v = (uint8_t)~N->getZExtValue(); - return ImmIsSingleBit(v); -}]>; - -def Clr5Imm16Pred : PatLeaf<(i32 imm), [{ - uint32_t v = (uint16_t)~N->getZExtValue(); - return ImmIsSingleBit(v); -}]>; - -def Set5Imm8 : SDNodeXForm<imm, [{ - uint32_t imm = (uint8_t)N->getZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Set5Imm16 : SDNodeXForm<imm, [{ - uint32_t imm = (uint16_t)N->getZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Set5Imm32 : SDNodeXForm<imm, [{ - uint32_t imm = (uint32_t)N->getZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Clr5Imm8 : SDNodeXForm<imm, [{ - uint32_t imm = (uint8_t)~N->getZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Clr5Imm16 : SDNodeXForm<imm, [{ - uint32_t imm = (uint16_t)~N->getZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Clr5Imm32 : SDNodeXForm<imm, [{ - int32_t imm = (int32_t)~N->getZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def NegImm8 : SDNodeXForm<imm, [{ - int8_t V = N->getSExtValue(); - return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); -}]>; - -def NegImm16 : SDNodeXForm<imm, [{ - int16_t V = N->getSExtValue(); - return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); -}]>; - -def NegImm32 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); -}]>; - -def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>; //===----------------------------------------------------------------------===// // Template class for MemOp instructions with the register value. @@ -2936,7 +2336,7 @@ class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp, class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, string memOp, bits<2> memOpBits> : MEMInst_V4 <(outs), - (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta), + (ins IntRegs:$base, ImmOp:$offset, u5_0Imm:$delta), opc#"($base+#$offset)"#memOp#"#$delta" #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')' []>, @@ -2996,235 +2396,6 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { } -multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper, - InstHexagon MI> { - // Addr: i32 - def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), - (MI I32:$Rs, 0, I32:$A)>; - // Addr: fi - def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs), - (MI AddrFI:$Rs, 0, I32:$A)>; -} - -multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, - SDNode Oper, InstHexagon MI> { - // Addr: i32 - def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A), - (add I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, I32:$A)>; - def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A), - (orisadd I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, I32:$A)>; - // Addr: fi - def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A), - (add AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, I32:$A)>; - def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A), - (orisadd AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, I32:$A)>; -} - -multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, - SDNode Oper, InstHexagon MI> { - defm: Memopxr_simple_pat <Load, Store, Oper, MI>; - defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; -} - -let AddedComplexity = 180 in { - // add reg - defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add, - /*anyext*/ L4_add_memopb_io>; - defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, - /*sext*/ L4_add_memopb_io>; - defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, - /*zext*/ L4_add_memopb_io>; - defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add, - /*anyext*/ L4_add_memoph_io>; - defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, - /*sext*/ L4_add_memoph_io>; - defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, - /*zext*/ L4_add_memoph_io>; - defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>; - - // sub reg - defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, - /*anyext*/ L4_sub_memopb_io>; - defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, - /*sext*/ L4_sub_memopb_io>; - defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, - /*zext*/ L4_sub_memopb_io>; - defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, - /*anyext*/ L4_sub_memoph_io>; - defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, - /*sext*/ L4_sub_memoph_io>; - defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, - /*zext*/ L4_sub_memoph_io>; - defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>; - - // and reg - defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and, - /*anyext*/ L4_and_memopb_io>; - defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, - /*sext*/ L4_and_memopb_io>; - defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, - /*zext*/ L4_and_memopb_io>; - defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and, - /*anyext*/ L4_and_memoph_io>; - defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, - /*sext*/ L4_and_memoph_io>; - defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, - /*zext*/ L4_and_memoph_io>; - defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>; - - // or reg - defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or, - /*anyext*/ L4_or_memopb_io>; - defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, - /*sext*/ L4_or_memopb_io>; - defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, - /*zext*/ L4_or_memopb_io>; - defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or, - /*anyext*/ L4_or_memoph_io>; - defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, - /*sext*/ L4_or_memoph_io>; - defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, - /*zext*/ L4_or_memoph_io>; - defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>; -} - - -multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper, - PatFrag Arg, SDNodeXForm ArgMod, - InstHexagon MI> { - // Addr: i32 - def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), - (MI I32:$Rs, 0, (ArgMod Arg:$A))>; - // Addr: fi - def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs), - (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>; -} - -multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, - SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, - InstHexagon MI> { - // Addr: i32 - def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A), - (add I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; - def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A), - (orisadd I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; - // Addr: fi - def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A), - (add AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; - def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A), - (orisadd AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; -} - -multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, - SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, - InstHexagon MI> { - defm: Memopxi_simple_pat <Load, Store, Oper, Arg, ArgMod, MI>; - defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; -} - - -let AddedComplexity = 200 in { - // add imm - defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5ImmPred, - /*anyext*/ IdImm, L4_iadd_memopb_io>; - defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5ImmPred, - /*sext*/ IdImm, L4_iadd_memopb_io>; - defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5ImmPred, - /*zext*/ IdImm, L4_iadd_memopb_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5ImmPred, - /*anyext*/ IdImm, L4_iadd_memoph_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5ImmPred, - /*sext*/ IdImm, L4_iadd_memoph_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5ImmPred, - /*zext*/ IdImm, L4_iadd_memoph_io>; - defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5ImmPred, IdImm, - L4_iadd_memopw_io>; - defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5Imm8Pred, - /*anyext*/ NegImm8, L4_iadd_memopb_io>; - defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5Imm8Pred, - /*sext*/ NegImm8, L4_iadd_memopb_io>; - defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5Imm8Pred, - /*zext*/ NegImm8, L4_iadd_memopb_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5Imm16Pred, - /*anyext*/ NegImm16, L4_iadd_memoph_io>; - defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5Imm16Pred, - /*sext*/ NegImm16, L4_iadd_memoph_io>; - defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5Imm16Pred, - /*zext*/ NegImm16, L4_iadd_memoph_io>; - defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5ImmPred, NegImm32, - L4_iadd_memopw_io>; - - // sub imm - defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5ImmPred, - /*anyext*/ IdImm, L4_isub_memopb_io>; - defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5ImmPred, - /*sext*/ IdImm, L4_isub_memopb_io>; - defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5ImmPred, - /*zext*/ IdImm, L4_isub_memopb_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5ImmPred, - /*anyext*/ IdImm, L4_isub_memoph_io>; - defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5ImmPred, - /*sext*/ IdImm, L4_isub_memoph_io>; - defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5ImmPred, - /*zext*/ IdImm, L4_isub_memoph_io>; - defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5ImmPred, IdImm, - L4_isub_memopw_io>; - defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5Imm8Pred, - /*anyext*/ NegImm8, L4_isub_memopb_io>; - defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5Imm8Pred, - /*sext*/ NegImm8, L4_isub_memopb_io>; - defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5Imm8Pred, - /*zext*/ NegImm8, L4_isub_memopb_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5Imm16Pred, - /*anyext*/ NegImm16, L4_isub_memoph_io>; - defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5Imm16Pred, - /*sext*/ NegImm16, L4_isub_memoph_io>; - defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5Imm16Pred, - /*zext*/ NegImm16, L4_isub_memoph_io>; - defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5ImmPred, NegImm32, - L4_isub_memopw_io>; - - // clrbit imm - defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred, - /*anyext*/ Clr5Imm8, L4_iand_memopb_io>; - defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred, - /*sext*/ Clr5Imm8, L4_iand_memopb_io>; - defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred, - /*zext*/ Clr5Imm8, L4_iand_memopb_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred, - /*anyext*/ Clr5Imm16, L4_iand_memoph_io>; - defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred, - /*sext*/ Clr5Imm16, L4_iand_memoph_io>; - defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred, - /*zext*/ Clr5Imm16, L4_iand_memoph_io>; - defm: Memopxi_pat<load, store, u6_2ImmPred, and, Clr5ImmPred, Clr5Imm32, - L4_iand_memopw_io>; - - // setbit imm - defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred, - /*anyext*/ Set5Imm8, L4_ior_memopb_io>; - defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred, - /*sext*/ Set5Imm8, L4_ior_memopb_io>; - defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred, - /*zext*/ Set5Imm8, L4_ior_memopb_io>; - defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred, - /*anyext*/ Set5Imm16, L4_ior_memoph_io>; - defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred, - /*sext*/ Set5Imm16, L4_ior_memoph_io>; - defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred, - /*zext*/ Set5Imm16, L4_ior_memoph_io>; - defm: Memopxi_pat<load, store, u6_2ImmPred, or, Set5ImmPred, Set5Imm32, - L4_ior_memopw_io>; -} - //===----------------------------------------------------------------------===// // XTYPE/PRED + //===----------------------------------------------------------------------===// @@ -3241,57 +2412,9 @@ let AddedComplexity = 200 in { // Pd=cmpb.eq(Rs,#u8) // p=!cmp.eq(r1,#s10) -def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>; -def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>; -def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>; - -def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>; -def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>; -def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>; - -// rs <= rt -> !(rs > rt). -/* -def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; -// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>; -*/ -// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), - (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; - -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), - (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>; - -// SDNode for converting immediate C to C-1. -def DEC_CONST_BYTE : SDNodeXForm<imm, [{ - // Return the byte immediate const-1 as an SDNode. - int32_t imm = N->getSExtValue(); - return XformU7ToU7M1Imm(imm, SDLoc(N)); -}]>; - -// For the sequence -// zext( setult ( and(Rs, 255), u8)) -// Use the isdigit transformation below - -// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' -// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. -// The isdigit transformation relies on two 'clever' aspects: -// 1) The data type is unsigned which allows us to eliminate a zero test after -// biasing the expression by 48. We are depending on the representation of -// the unsigned types, and semantics. -// 2) The front end has converted <= 9 into < 10 on entry to LLVM -// -// For the C code: -// retval = ((c>='0') & (c<='9')) ? 1 : 0; -// The code is transformed upstream of llvm into -// retval = (c-48) < 10 ? 1 : 0; -let AddedComplexity = 139 in -def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), - u7StrictPosImmPred:$src2)))), - (C2_muxii (A4_cmpbgtui IntRegs:$src1, - (DEC_CONST_BYTE u7StrictPosImmPred:$src2)), - 0, 1)>; +def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10_0Ext>; +def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10_0Ext>; +def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9_0Ext>; //===----------------------------------------------------------------------===// // XTYPE/PRED - @@ -3450,7 +2573,7 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, bit isHalf, bit isNot, bit isNew> - : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2), + : STInst<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""), [], "", ST_tc_st_SLOT01>, AddrModeRel { @@ -3482,7 +2605,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, //===----------------------------------------------------------------------===// class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf> - : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>, + : T_StoreAbsGP <mnemonic, RC, u32_0MustExt, MajOp, 1, isHalf>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3504,7 +2627,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf = 0> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { let opExtendable = 0, isPredicable = 1 in - def S2_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>; + def PS_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>; // Predicated def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>; @@ -3554,7 +2677,7 @@ class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp> let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1, isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew> - : NVInst_V4<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, IntRegs:$src2), + : NVInst_V4<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, IntRegs:$src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", ") ")#mnemonic#"(#$absaddr) = $src2.new", [], "", ST_tc_st_SLOT0>, AddrModeRel { @@ -3584,7 +2707,7 @@ class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew> // absolute addressing. //===----------------------------------------------------------------------===// class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp> - : T_StoreAbsGP_NV <mnemonic, u32MustExt, MajOp>, AddrModeRel { + : T_StoreAbsGP_NV <mnemonic, u32_0MustExt, MajOp>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3606,7 +2729,7 @@ multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp, bits<2> MajOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { let opExtendable = 0, isPredicable = 1 in - def S2_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>; + def PS_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>; // Predicated def S4_p#NAME#newt_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>; @@ -3687,50 +2810,6 @@ let isNVStorable = 0, accessSize = HalfWordAccess in def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>, PredNewRel; -class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI> - : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; - -class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod, - InstHexagon MI> - : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; - -class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI> - : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; - -class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, - InstHexagon MI> - : Pat<(Store Value:$val, Addr:$addr), - (MI Addr:$addr, (ValueMod Value:$val))>; - -let AddedComplexity = 30 in { - def: Storea_pat<truncstorei8, I32, addrga, S2_storerbabs>; - def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>; - def: Storea_pat<store, I32, addrga, S2_storeriabs>; - def: Storea_pat<store, I64, addrga, S2_storerdabs>; - - def: Stoream_pat<truncstorei8, I64, addrga, LoReg, S2_storerbabs>; - def: Stoream_pat<truncstorei16, I64, addrga, LoReg, S2_storerhabs>; - def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>; -} - -def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; -def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; -def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>; -def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; - -let AddedComplexity = 100 in { - def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; - def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; - def: Storea_pat<store, I32, addrgp, S2_storerigp>; - def: Storea_pat<store, I64, addrgp, S2_storerdgp>; - - // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" - // to "r0 = 1; memw(#foo) = r0" - let AddedComplexity = 100 in - def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; -} - //===----------------------------------------------------------------------===// // Template class for non predicated load instructions with // absolute addressing mode. @@ -3764,7 +2843,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<3> MajOp> - : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel { + : T_LoadAbsGP <mnemonic, RC, u32_0MustExt, MajOp>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3786,7 +2865,7 @@ let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6, opExtendable = 2 in class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isPredNot, bit isPredNew> - : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr), + : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32_0MustExt:$absaddr), !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel { bits<5> dst; @@ -3826,7 +2905,7 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC, Operand ImmOp, bits<3> MajOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { let opExtendable = 1, isPredicable = 1 in - def L4_#NAME#_abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>; + def PS_#NAME#abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>; // Predicated defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>; @@ -3850,26 +2929,6 @@ defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; let accessSize = DoubleWordAccess in defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; -class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> - : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))), - (VT (MI tglobaladdr:$absaddr))>; - -let AddedComplexity = 30 in { - def: LoadAbs_pats <load, L4_loadri_abs>; - def: LoadAbs_pats <zextloadi1, L4_loadrub_abs>; - def: LoadAbs_pats <sextloadi8, L4_loadrb_abs>; - def: LoadAbs_pats <extloadi8, L4_loadrub_abs>; - def: LoadAbs_pats <zextloadi8, L4_loadrub_abs>; - def: LoadAbs_pats <sextloadi16, L4_loadrh_abs>; - def: LoadAbs_pats <extloadi16, L4_loadruh_abs>; - def: LoadAbs_pats <zextloadi16, L4_loadruh_abs>; - def: LoadAbs_pats <load, L4_loadrd_abs, i64>; -} - -let AddedComplexity = 30 in -def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))), - (Zext64 (L4_loadrub_abs tglobaladdr:$absaddr))>; - //===----------------------------------------------------------------------===// // multiclass for load instructions with GP-relative addressing mode. // Rx=mem[bhwd](##global) @@ -3900,149 +2959,6 @@ def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; let accessSize = DoubleWordAccess in def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; -def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; -def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; -def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; -def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; - -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>; -def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; - -def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>; -def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; - -// Map from load(globaladdress) -> mem[u][bhwd](#foo) -class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> - : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), - (VT (MI tglobaladdr:$global))>; - -let AddedComplexity = 100 in { - def: LoadGP_pats <extloadi8, L2_loadrubgp>; - def: LoadGP_pats <sextloadi8, L2_loadrbgp>; - def: LoadGP_pats <zextloadi8, L2_loadrubgp>; - def: LoadGP_pats <extloadi16, L2_loadruhgp>; - def: LoadGP_pats <sextloadi16, L2_loadrhgp>; - def: LoadGP_pats <zextloadi16, L2_loadruhgp>; - def: LoadGP_pats <load, L2_loadrigp>; - def: LoadGP_pats <load, L2_loadrdgp, i64>; -} - -// When the Interprocedural Global Variable optimizer realizes that a certain -// global variable takes only two constant values, it shrinks the global to -// a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in { - def: LoadGP_pats <extloadi1, L2_loadrubgp>; - def: LoadGP_pats <zextloadi1, L2_loadrubgp>; -} - -// Transfer global address into a register -def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; -def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>; -def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; - -let AddedComplexity = 30 in { - def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>; - def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>; - def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>; -} - -let AddedComplexity = 30 in { - def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>; - def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>; - def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>; - def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>; - def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>; -} - -// Indexed store word - global address. -// memw(Rs+#u6:2)=#S8 -let AddedComplexity = 100 in -defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>; - -// Load from a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Loada_pat<extloadi8, i32, addrga, L4_loadrub_abs>; - def: Loada_pat<sextloadi8, i32, addrga, L4_loadrb_abs>; - def: Loada_pat<zextloadi8, i32, addrga, L4_loadrub_abs>; - - def: Loada_pat<extloadi16, i32, addrga, L4_loadruh_abs>; - def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>; - def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>; - - def: Loada_pat<load, i32, addrga, L4_loadri_abs>; - def: Loada_pat<load, i64, addrga, L4_loadrd_abs>; -} - -// Store to a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Storea_pat<truncstorei8, I32, addrga, S2_storerbabs>; - def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>; - def: Storea_pat<store, I32, addrga, S2_storeriabs>; - def: Storea_pat<store, I64, addrga, S2_storerdabs>; - - def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>; -} - -// i8/i16/i32 -> i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextload. -let AddedComplexity = 120 in { - def: Loadam_pat<extloadi8, i64, addrga, Zext64, L4_loadrub_abs>; - def: Loadam_pat<sextloadi8, i64, addrga, Sext64, L4_loadrb_abs>; - def: Loadam_pat<zextloadi8, i64, addrga, Zext64, L4_loadrub_abs>; - - def: Loadam_pat<extloadi16, i64, addrga, Zext64, L4_loadruh_abs>; - def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>; - def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>; - - def: Loadam_pat<extloadi32, i64, addrga, Zext64, L4_loadri_abs>; - def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>; - def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>; -} - -let AddedComplexity = 100 in { - def: Loada_pat<extloadi8, i32, addrgp, L4_loadrub_abs>; - def: Loada_pat<sextloadi8, i32, addrgp, L4_loadrb_abs>; - def: Loada_pat<zextloadi8, i32, addrgp, L4_loadrub_abs>; - - def: Loada_pat<extloadi16, i32, addrgp, L4_loadruh_abs>; - def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>; - def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>; - - def: Loada_pat<load, i32, addrgp, L4_loadri_abs>; - def: Loada_pat<load, i64, addrgp, L4_loadrd_abs>; -} - -let AddedComplexity = 100 in { - def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbabs>; - def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>; - def: Storea_pat<store, I32, addrgp, S2_storeriabs>; - def: Storea_pat<store, I64, addrgp, S2_storerdabs>; -} - -def: Loada_pat<atomic_load_8, i32, addrgp, L4_loadrub_abs>; -def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>; -def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>; -def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>; - -def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbabs>; -def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>; -def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>; -def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>; - -let Constraints = "@earlyclobber $dst" in -def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b, - IntRegs:$c, IntRegs:$d), - ".error \"Should never try to emit Insert4\"", - [(set (i64 DoubleRegs:$dst), - (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))), - (i32 16)), - (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))), - (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))), - (i32 32))), - (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>; - //===----------------------------------------------------------------------===// // :raw for of boundscheck:hi:lo insns //===----------------------------------------------------------------------===// @@ -4111,20 +3027,12 @@ def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd), let Inst{1-0} = Pd; } -// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH -// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. -// We don't really want either one here. -def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; -def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, - [SDNPHasChain]>; - // Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't // really do a load. let hasSideEffects = 1, mayLoad = 0 in def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), "dcfetch($Rs + #$u11_3)", - [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)], - "", LD_tc_ld_SLOT0> { + [], "", LD_tc_ld_SLOT0> { bits<5> Rs; bits<14> u11_3; @@ -4136,9 +3044,6 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), } -def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), - (Y2_dcfetchbo IntRegs:$Rs, u11_3ImmPred:$u11_3)>; - //===----------------------------------------------------------------------===// // Compound instructions //===----------------------------------------------------------------------===// @@ -4248,7 +3153,7 @@ let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in class CJInst_RU5<string px, string op, bit np, string tnt> - : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2), + : InstHexagon<(outs), (ins IntRegs:$Rs, u5_0Imm:$U5, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, #$U5); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon { @@ -4300,11 +3205,11 @@ defm gtu : T_pnp_CJInst_RU5<"gtu">; let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, - isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1, + isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in class CJInst_Rn1<string px, string op, bit np, string tnt> - : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), - ""#px#" = cmp."#op#"($Rs,#-1); if (" + : InstHexagon<(outs), (ins IntRegs:$Rs, n1Const:$n1, brtarget:$r9_2), + ""#px#" = cmp."#op#"($Rs,#$n1); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; @@ -4357,7 +3262,7 @@ let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, opExtentAlign = 2, opExtendable = 2 in def J4_jumpseti: CJInst_JMPSET < (outs IntRegs:$Rd), - (ins u6Imm:$U6, brtarget:$r9_2), + (ins u6_0Imm:$U6, brtarget:$r9_2), "$Rd = #$U6 ; jump $r9_2"> { bits<4> Rd; bits<6> U6; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td index 823961f..cd19b69 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -43,10 +43,7 @@ let Predicates = [HasV5T] in { def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>; } -def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm, - [(set I64:$dst, - (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)), - (i32 1)))], 1>, +def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm, [], 1>, Requires<[HasV5T]> { bits<6> src2; let Inst{13-8} = src2; @@ -54,7 +51,7 @@ def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm, let isAsmParserOnly = 1 in def S2_asr_i_p_rnd_goodsyntax - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6_0Imm:$src2), "$dst = asrrnd($src1, #$src2)">; def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>, @@ -67,66 +64,9 @@ def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>, let Inst{20,13,7,4} = 0b1111; } -def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, - SDTCisPtrTy<1>]>; -def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in -def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set F32:$dst, - (HexagonFCONST32 tglobaladdr:$global))]>, - Requires<[HasV5T]>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in -def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1), - "$dst = CONST64(#$src1)", - [(set F64:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in -def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), - "$dst = CONST32(#$src1)", - [(set F32:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; - -// Transfer immediate float. -// Only works with single precision fp value. -// For double precision, use CONST64_float_real, as 64bit transfer -// can only hold 40-bit values - 32 from const ext + 8 bit immediate. -// Make sure that complexity is more than the CONST32 pattern in -// HexagonInstrInfo.td patterns. -let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1, - isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, - isCodeGenOnly = 1, isPseudo = 1 in -def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1), - "$dst = #$src1", - [(set F32:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; - -let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0, - validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in -def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, f32Ext:$src2), - "if ($src1) $dst = #$src2", []>, - Requires<[HasV5T]>; - -let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1, - hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in -def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, f32Ext:$src2), - "if (!$src1) $dst = #$src2", []>, - Requires<[HasV5T]>; - -def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i64>]>; - -def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; - let hasNewValue = 1, validSubTargets = HasV5SubT in def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), - "$Rd = popcount($Rss)", - [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>, + "$Rd = popcount($Rss)", [], "", S_2op_tc_2_SLOT23>, Requires<[HasV5T]> { bits<5> Rd; bits<5> Rss; @@ -139,14 +79,6 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), let Inst{20-16} = Rss; } -defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>; -defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>; - -defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>; -defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>; -def: Storex_simple_pat<store, F32, S2_storeri_io>; -def: Storex_simple_pat<store, F64, S2_storerd_io>; - let isFP = 1, hasNewValue = 1, opNewValue = 0 in class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp> : MInst<(outs IntRegs:$Rd), @@ -176,44 +108,19 @@ let isCommutable = 1 in { def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>; -def: Pat<(f32 (fadd F32:$src1, F32:$src2)), - (F2_sfadd F32:$src1, F32:$src2)>; - -def: Pat<(f32 (fsub F32:$src1, F32:$src2)), - (F2_sfsub F32:$src1, F32:$src2)>; - -def: Pat<(f32 (fmul F32:$src1, F32:$src2)), - (F2_sfmpy F32:$src1, F32:$src2)>; - let Itinerary = M_tc_3x_SLOT23 in { def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>; def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>; } -let AddedComplexity = 100, Predicates = [HasV5T] in { - def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), - F32:$src1, F32:$src2)), - (F2_sfmin F32:$src1, F32:$src2)>; - - def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), - F32:$src2, F32:$src1)), - (F2_sfmin F32:$src1, F32:$src2)>; - - def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), - F32:$src1, F32:$src2)), - (F2_sfmax F32:$src1, F32:$src2)>; - - def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), - F32:$src2, F32:$src1)), - (F2_sfmax F32:$src1, F32:$src2)>; -} - +let Itinerary = M_tc_3or4x_SLOT23 in { def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>; def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>; +} // F2_sfrecipa: Reciprocal approximation for division. -let isPredicateLate = 1, isFP = 1, -hasSideEffects = 0, hasNewValue = 1 in +let Uses = [USR], isPredicateLate = 1, isFP = 1, + hasSideEffects = 0, hasNewValue = 1, Itinerary = M_tc_3or4x_SLOT23 in def F2_sfrecipa: MInst < (outs IntRegs:$Rd, PredRegs:$Pe), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -235,7 +142,7 @@ def F2_sfrecipa: MInst < } // F2_dfcmpeq: Floating point compare for equal. -let isCompare = 1, isFP = 1 in +let Uses = [USR], isCompare = 1, isFP = 1 in class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp, list<dag> pattern = [] > : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2), @@ -256,15 +163,13 @@ class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp, } class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp> - : T_fcmp <mnemonic, DoubleRegs, MinOp, - [(set I1:$dst, (OpNode F64:$src1, F64:$src2))]> { + : T_fcmp <mnemonic, DoubleRegs, MinOp, []> { let IClass = 0b1101; let Inst{27-21} = 0b0010111; } class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp> - : T_fcmp <mnemonic, IntRegs, MinOp, - [(set I1:$dst, (OpNode F32:$src1, F32:$src2))]> { + : T_fcmp <mnemonic, IntRegs, MinOp, []> { let IClass = 0b1100; let Inst{27-21} = 0b0111111; } @@ -279,259 +184,12 @@ def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>; def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>; def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>; -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations. -//===----------------------------------------------------------------------===// - -let Predicates = [HasV5T] in -multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (IntMI F32:$src1, F32:$src2)>; - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (DoubleMI F64:$src1, F64:$src2)>; -} - -defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>; -defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>; -defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>; -defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>; -defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) -// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI, - InstHexagon DoubleMI> { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>; -defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>; -defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) -// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) -// setne(setolt(op1, op2), 0) -> setogt(op2, op1) -// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI, - InstHexagon DoubleMI> { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - - // DoubleRegs - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; -} - -defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>; -defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>; - - -// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp -let Predicates = [HasV5T] in { - def: Pat<(i1 (seto F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; - def: Pat<(i1 (seto F32:$src1, fpimm:$src2)), - (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>; - def: Pat<(i1 (seto F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; - def: Pat<(i1 (seto F64:$src1, fpimm:$src2)), - (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>; -} - -// Ordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setolt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)), - (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>; - def: Pat<(i1 (setolt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)), - (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; -} - -// Unordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setult F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpgt F32:$src2, F32:$src1))>; - def: Pat<(i1 (setult F32:$src1, fpimm:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), - (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>; - def: Pat<(i1 (setult F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpgt F64:$src2, F64:$src1))>; - def: Pat<(i1 (setult F64:$src1, fpimm:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), - (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>; -} - -// Ordered le. -let Predicates = [HasV5T] in { - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setole F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setole F32:$src1, fpimm:$src2)), - (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setole F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setole F64:$src1, fpimm:$src2)), - (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; -} - -// Unordered le. -let Predicates = [HasV5T] in { -// rs <= rt -> rt >= rs. - def: Pat<(i1 (setule F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpge F32:$src2, F32:$src1))>; - def: Pat<(i1 (setule F32:$src1, fpimm:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), - (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>; - def: Pat<(i1 (setule F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpge F64:$src2, F64:$src1))>; - def: Pat<(i1 (setule F64:$src1, fpimm:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), - (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>; -} - -// Ordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setone F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setone F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setone F32:$src1, fpimm:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; - def: Pat<(i1 (setone F64:$src1, fpimm:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; -} - -// Unordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setune F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; - def: Pat<(i1 (setune F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; - def: Pat<(i1 (setune F32:$src1, fpimm:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>; - def: Pat<(i1 (setune F64:$src1, fpimm:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, - (CONST64_Float_Real fpimm:$src2))))>; -} - -// Besides set[o|u][comparions], we also need set[comparisons]. -let Predicates = [HasV5T] in { - // lt. - def: Pat<(i1 (setlt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)), - (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>; - def: Pat<(i1 (setlt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)), - (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; - - // le. - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setle F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setle F32:$src1, fpimm:$src2)), - (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setle F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setle F64:$src1, fpimm:$src2)), - (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; - - // ne. - def: Pat<(i1 (setne F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setne F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setne F32:$src1, fpimm:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; - def: Pat<(i1 (setne F64:$src1, fpimm:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; -} - // F2 convert template classes: -let isFP = 1 in +let Uses = [USR], isFP = 1 in class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), - "$Rdd = "#mnemonic#"($Rss)"#chop, - [(set RCOut:$Rdd, (Op RCIn:$Rss))], "", + "$Rdd = "#mnemonic#"($Rss)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rdd; bits<5> Rss; @@ -544,13 +202,11 @@ class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp, let Inst{4-0} = Rdd; } -let isFP = 1 in +let Uses = [USR], isFP = 1 in class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs), - "$Rdd = "#mnemonic#"($Rs)"#chop, - [(set RCOut:$Rdd, (Op RCIn:$Rs))], "", + "$Rdd = "#mnemonic#"($Rs)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rdd; bits<5> Rs; @@ -563,13 +219,11 @@ class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp, let Inst{4-0} = Rdd; } -let isFP = 1, hasNewValue = 1 in +let Uses = [USR], isFP = 1, hasNewValue = 1 in class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), - "$Rd = "#mnemonic#"($Rss)"#chop, - [(set RCOut:$Rd, (Op RCIn:$Rss))], "", + "$Rd = "#mnemonic#"($Rss)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rd; bits<5> Rss; @@ -583,13 +237,11 @@ class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp, let Inst{4-0} = Rd; } -let isFP = 1, hasNewValue = 1 in +let Uses = [USR], isFP = 1, hasNewValue = 1 in class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs), - "$Rd = "#mnemonic#"($Rs)"#chop, - [(set RCOut:$Rd, (Op RCIn:$Rs))], "", + "$Rd = "#mnemonic#"($Rs)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -604,70 +256,45 @@ class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp, } // Convert single precision to double precision and vice-versa. -def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000, - fextend, F64, F32>; - -def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000, - fround, F32, F64>; +def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000>; +def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000>; // Convert Integer to Floating Point. -def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010, - sint_to_fp, F32, I64>; -def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001, - uint_to_fp, F32, I64>; -def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000, - uint_to_fp, F32, I32>; -def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000, - sint_to_fp, F32, I32>; -def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011, - sint_to_fp, F64, I64>; -def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010, - uint_to_fp, F64, I64>; -def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001, - uint_to_fp, F64, I32>; -def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010, - sint_to_fp, F64, I32>; - -// Convert Floating Point to Integer - default. -def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, - fp_to_uint, I32, F64, ":chop">; -def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, - fp_to_sint, I32, F64, ":chop">; +def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010>; +def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001>; +def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000>; +def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000>; +def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011>; +def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010>; +def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001>; +def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010>; + +// Convert Floating Point to Integer. +def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, ":chop">; +def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, ":chop">; def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001, - fp_to_uint, I32, F32, ":chop">; + ":chop">; def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001, - fp_to_sint, I32, F32, ":chop">; -def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, - fp_to_sint, I64, F64, ":chop">; -def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, - fp_to_uint, I64, F64, ":chop">; -def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, - fp_to_sint, I64, F32, ":chop">; -def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, - fp_to_uint, I64, F32, ":chop">; + ":chop">; +def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, ":chop">; +def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, ":chop">; +def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, ":chop">; +def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, ":chop">; // Convert Floating Point to Integer: non-chopped. -let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in { - def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000, - fp_to_sint, I64, F64>; - def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001, - fp_to_uint, I64, F64>; - def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011, - fp_to_uint, I64, F32>; - def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100, - fp_to_sint, I64, F32>; - def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011, - fp_to_uint, I32, F64>; - def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100, - fp_to_sint, I32, F64>; - def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000, - fp_to_uint, I32, F32>; - def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000, - fp_to_sint, I32, F32>; +let AddedComplexity = 20, Predicates = [HasV5T] in { + def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000>; + def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001>; + def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011>; + def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100>; + def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011>; + def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100>; + def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000>; + def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000>; } // Fix up radicand. -let isFP = 1, hasNewValue = 1 in +let Uses = [USR], isFP = 1, hasNewValue = 1 in def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = sffixupr($Rs)", [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> { @@ -682,21 +309,13 @@ def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs), let Inst{4-0} = Rd; } -// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -let Predicates = [HasV5T] in { - def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; - def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; - def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; - def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; -} - // F2_sffma: Floating-point fused multiply add. -let isFP = 1, hasNewValue = 1 in +let Uses = [USR], isFP = 1, hasNewValue = 1 in class T_sfmpy_acc <bit isSub, bit isLib> : MInst<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""), - [], "$dst2 = $Rx" , M_tc_3_SLOT23 > , + [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > , Requires<[HasV5T]> { bits<5> Rx; bits<5> Rs; @@ -719,16 +338,13 @@ def F2_sffms: T_sfmpy_acc <1, 0>; def F2_sffma_lib: T_sfmpy_acc <0, 1>; def F2_sffms_lib: T_sfmpy_acc <1, 1>; -def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)), - (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; - // Floating-point fused multiply add w/ additional scaling (2**pu). -let isFP = 1, hasNewValue = 1 in +let Uses = [USR], isFP = 1, hasNewValue = 1 in def F2_sffma_sc: MInst < (outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu), "$Rx += sfmpy($Rs, $Rt, $Pu):scale" , - [], "$dst2 = $Rx" , M_tc_3_SLOT23 > , + [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > , Requires<[HasV5T]> { bits<5> Rx; bits<5> Rs; @@ -746,54 +362,6 @@ def F2_sffma_sc: MInst < let Inst{4-0} = Rx; } -let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3, - isPseudo = 1, InputType = "imm" in -def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3), - "$dst = mux($src1, $src2, #$src3)", - [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>, - Requires<[HasV5T]>; - -let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2, - isPseudo = 1, InputType = "imm" in -def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3), - "$dst = mux($src1, #$src2, $src3)", - [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F32:$src2, F32:$src3), - (C2_mux I1:$src1, F32:$src2, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), - (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F64:$src2, F64:$src3), - (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), - (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = MUX_ir_f(p0, #i, r1) -def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3), - (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = MUX_ri_f(p0, r1, #i) -def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3), - (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>, - Requires<[HasV5T]>; - -def: Pat<(i32 (fp_to_sint F64:$src1)), - (LoReg (F2_conv_df2d_chop F64:$src1))>, - Requires<[HasV5T]>; - //===----------------------------------------------------------------------===// // :natural forms of vasrh and vasrhub insns //===----------------------------------------------------------------------===// @@ -802,7 +370,7 @@ def: Pat<(i32 (fp_to_sint F64:$src1)), let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in class T_ASRHUB<bit isSat> : SInst <(outs IntRegs:$Rd), - (ins DoubleRegs:$Rss, u4Imm:$u4), + (ins DoubleRegs:$Rss, u4_0Imm:$u4), "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"), [], "", S_2op_tc_2_SLOT23>, Requires<[HasV5T]> { @@ -826,13 +394,13 @@ def S5_asrhub_sat : T_ASRHUB <1>; let isAsmParserOnly = 1 in def S5_asrhub_rnd_sat_goodsyntax - : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4), + : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4_0Imm:$u4), "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>; // S5_vasrhrnd: Vector arithmetic shift right by immediate with round. let hasSideEffects = 0 in def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd), - (ins DoubleRegs:$Rss, u4Imm:$u4), + (ins DoubleRegs:$Rss, u4_0Imm:$u4), "$Rdd = vasrh($Rss, #$u4):raw">, Requires<[HasV5T]> { bits<5> Rdd; @@ -851,7 +419,7 @@ def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd), let isAsmParserOnly = 1 in def S5_vasrhrnd_goodsyntax - : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4), + : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4_0Imm:$u4), "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>; // Floating point reciprocal square root approximation @@ -883,11 +451,11 @@ let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { } // Classify floating-point value -let isFP = 1 in - def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>; +let Uses = [USR], isFP = 1 in +def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>, Requires<[HasV5T]>; -let isFP = 1 in -def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5), +let Uses = [USR], isFP = 1 in +def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5_0Imm:$u5), "$Pd = dfclass($Rss, #$u5)", [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> { bits<2> Pd; @@ -905,9 +473,9 @@ def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5), // Instructions to create floating point constant class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg> - : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src), + : ALU64Inst<(outs RC:$dst), (ins u10_0Imm:$src), "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"), - [], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> { + [], "", ALU64_tc_2_SLOT23>, Requires<[HasV5T]> { bits<5> dst; bits<10> src; @@ -921,17 +489,9 @@ class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg> } let hasNewValue = 1, opNewValue = 0 in { -def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>; -def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>; + def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>; + def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>; } def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>; def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>; - -def : Pat <(fabs (f32 IntRegs:$src1)), - (S2_clrbit_i (f32 IntRegs:$src1), 31)>, - Requires<[HasV5T]>; - -def : Pat <(fneg (f32 IntRegs:$src1)), - (S2_togglebit_i (f32 IntRegs:$src1), 31)>, - Requires<[HasV5T]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td index c3f09b6..c50141b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -10,33 +10,6 @@ // This file describes the Hexagon V60 instructions in TableGen format. // //===----------------------------------------------------------------------===// -def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - - -// Vector store -let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in -{ - class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], - string cstr = "", InstrItinClass itin = CVI_VM_ST, - IType type = TypeCVI_VM_ST> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon; - -} - // Vector load let Predicates = [HasV60T, UseHVX] in let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in @@ -45,6 +18,7 @@ let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in IType type = TypeCVI_VM_LD> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; +// Vector store let Predicates = [HasV60T, UseHVX] in let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], @@ -219,6 +193,8 @@ let isNVStorable = 1 in { def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>, V6_vS32b_npred_ai_128B_enc; } + + let isNVStorable = 1, isNonTemporal = 1 in { def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>, V6_vS32b_nt_pred_ai_enc; @@ -774,256 +750,60 @@ def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>, V6_vS32b_nt_new_npred_ppu_enc; } -let isPseudo = 1, validSubTargets = HasV60SubT in -class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>: - VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src), - #mnemonic#"($addr+#$off) = $src", []>; - -def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>, - Requires<[HasV60T, UseHVXSgl]>; -def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>, - Requires<[HasV60T, UseHVXDbl]>; - -multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { - def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr), - (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>, - Requires<[UseHVXSgl]>; - - def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), - (STrivv_indexed_128B IntRegs:$addr, #0, - (VTDbl VecDblRegs128B:$src1))>, - Requires<[UseHVXDbl]>; -} - -defm : STrivv_pats <v128i8, v256i8>; -defm : STrivv_pats <v64i16, v128i16>; -defm : STrivv_pats <v32i32, v64i32>; -defm : STrivv_pats <v16i64, v32i64>; - - -multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { - // Aligned stores - def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), - (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), - (V6_vS32Ub_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - - // 128B Aligned stores - def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), - (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), - (V6_vS32Ub_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - - // Fold Add R+IFF into vector store. - let AddedComplexity = 10 in { - def : Pat<(alignedstore (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, - (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset, - (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - - // Fold Add R+IFF into vector store 128B. - def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, - (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, - (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - } -} - -defm : vS32b_ai_pats <v64i8, v128i8>; -defm : vS32b_ai_pats <v32i16, v64i16>; -defm : vS32b_ai_pats <v16i32, v32i32>; -defm : vS32b_ai_pats <v8i64, v16i64>; - -let isPseudo = 1, validSubTargets = HasV60SubT in -class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC> - : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off), - "$dst="#mnemonic#"($addr+#$off)", - []>, - Requires<[HasV60T,UseHVXSgl]>; - -def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>; -def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>; - -multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { - def : Pat < (VTSgl (load IntRegs:$addr)), - (LDrivv_indexed IntRegs:$addr, #0) >, - Requires<[UseHVXSgl]>; - - def : Pat < (VTDbl (load IntRegs:$addr)), - (LDrivv_indexed_128B IntRegs:$addr, #0) >, - Requires<[UseHVXDbl]>; -} - -defm : LDrivv_pats <v128i8, v256i8>; -defm : LDrivv_pats <v64i16, v128i16>; -defm : LDrivv_pats <v32i32, v64i32>; -defm : LDrivv_pats <v16i64, v32i64>; - -multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { - // Aligned loads - def : Pat < (VTSgl (alignedload IntRegs:$addr)), - (V6_vL32b_ai IntRegs:$addr, #0) >, - Requires<[UseHVXSgl]>; - def : Pat < (VTSgl (unalignedload IntRegs:$addr)), - (V6_vL32Ub_ai IntRegs:$addr, #0) >, - Requires<[UseHVXSgl]>; - - // 128B Load - def : Pat < (VTDbl (alignedload IntRegs:$addr)), - (V6_vL32b_ai_128B IntRegs:$addr, #0) >, - Requires<[UseHVXDbl]>; - def : Pat < (VTDbl (unalignedload IntRegs:$addr)), - (V6_vL32Ub_ai_128B IntRegs:$addr, #0) >, - Requires<[UseHVXDbl]>; - - // Fold Add R+IFF into vector load. - let AddedComplexity = 10 in { - def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, - Requires<[UseHVXDbl]>; - def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, - Requires<[UseHVXDbl]>; - - def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, - Requires<[UseHVXSgl]>; - def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>, - Requires<[UseHVXSgl]>; - } -} - -defm : vL32b_ai_pats <v64i8, v128i8>; -defm : vL32b_ai_pats <v32i16, v64i16>; -defm : vL32b_ai_pats <v16i32, v32i32>; -defm : vL32b_ai_pats <v8i64, v16i64>; -// Store vector predicate pseudo. -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, - isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { -def STriq_pred_V6 : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; - -def STriq_pred_vec_V6 : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; - -def STriq_pred_V6_128B : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; - -def STriq_pred_vec_V6_128B : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; -} +// Vector load/store pseudos -// Load vector predicate pseudo. -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, - opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { -def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; -def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; -} - -// Store vector pseudo. -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, - isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { -def STriv_pseudo_V6 : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -def STriv_pseudo_V6_128B : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; -} +let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in +class STrivv_template<RegisterClass RC> + : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>; + +def PS_vstorerw_ai: STrivv_template<VecDblRegs>, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vstorerwu_ai: STrivv_template<VecDblRegs>, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B>, + Requires<[HasV60T,UseHVXDbl]>; +def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>, + Requires<[HasV60T,UseHVXDbl]>; + + +let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in +class LDrivv_template<RegisterClass RC> + : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>; +def PS_vloadrw_ai: LDrivv_template<VecDblRegs>, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vloadrwu_ai: LDrivv_template<VecDblRegs>, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B>, + Requires<[HasV60T,UseHVXDbl]>; +def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>, + Requires<[HasV60T,UseHVXDbl]>; + +// Store vector predicate pseudo. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { -def STrivv_pseudo_V6 : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -def STrivv_pseudo_V6_128B : STInst<(outs), - (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; -} - -// Load vector pseudo. -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, - opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { -def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; + def PS_vstorerq_ai : STInst<(outs), + (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1), + ".error \"should not emit\"", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vstorerq_ai_128B : STInst<(outs), + (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1), + ".error \"should not emit\"", []>, + Requires<[HasV60T,UseHVXDbl]>; } +// Load vector predicate pseudo. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { -def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst), - (ins IntRegs:$base, s32Imm:$offset), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXDbl]>; + def PS_vloadrq_ai : LDInst<(outs VecPredRegs:$dst), + (ins IntRegs:$base, s32_0Imm:$offset), + ".error \"should not emit\"", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vloadrq_ai_128B : LDInst<(outs VecPredRegs128B:$dst), + (ins IntRegs:$base, s32_0Imm:$offset), + ".error \"should not emit\"", []>, + Requires<[HasV60T,UseHVXDbl]>; } class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], @@ -1032,26 +812,19 @@ class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { -def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst), - (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst), - (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), - ".error \"should not emit\" ", - []>, - Requires<[HasV60T,UseHVXSgl]>; -} - -def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs), - (v16i32 VectorRegs:$tval), - (v16i32 VectorRegs:$fval), SETEQ)), - (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs), - (i32 IntRegs:$rhs))), - (v16i32 VectorRegs:$tval), - (v16i32 VectorRegs:$fval)))>; - + def PS_vselect: VSELInst<(outs VectorRegs:$dst), + (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst), + (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3), + "", []>, Requires<[HasV60T,UseHVXDbl]>; + def PS_wselect: VSELInst<(outs VecDblRegs:$dst), + (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst), + (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3), + "", []>, Requires<[HasV60T,UseHVXDbl]>; +} let hasNewValue = 1 in class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin> @@ -1581,20 +1354,6 @@ let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in defm V6_vcombine : T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc; -def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, - SDTCisSubVecOfVec<1, 0>]>; - -def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; - -def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), - (v16i32 VectorRegs:$Vt))), - (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), - (v32i32 VecDblRegs:$Vt))), - (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; - let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in { defm V6_vsathub : T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc; @@ -1782,7 +1541,7 @@ let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in class T_HVX_vmpyacc2 <string asmString, RegisterClass RC> : CVI_VA_Resource1 <(outs RC:$dst), - (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3), + (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1_0Imm:$src3), asmString, [], "$dst = $_src_" > ; @@ -1806,7 +1565,7 @@ defm V6_vrmpyubi_acc : let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in class T_HVX_vmpy2 <string asmString, RegisterClass RC> - : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3), + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1_0Imm:$src3), asmString>; @@ -1958,7 +1717,7 @@ defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_e let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1, hasSideEffects = 0 in class T_HVX_valign <string asmString, RegisterClass RC> - : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3), + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3_0Imm:$src3), asmString>; multiclass T_HVX_valign <string asmString> { @@ -2095,9 +1854,9 @@ class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp > : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>; class T_HVX_rol_R <string asmString> - : T_HVX_rol <asmString, IntRegs, u5Imm>; + : T_HVX_rol <asmString, IntRegs, u5_0Imm>; class T_HVX_rol_P <string asmString> - : T_HVX_rol <asmString, DoubleRegs, u6Imm>; + : T_HVX_rol <asmString, DoubleRegs, u6_0Imm>; def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc; let hasNewValue = 1, opNewValue = 0 in @@ -2109,10 +1868,10 @@ class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp> asmString, [], "$dst = $_src_" >; class T_HVX_rol_acc_P <string asmString> - : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>; + : T_HVX_rol_acc <asmString, DoubleRegs, u6_0Imm>; class T_HVX_rol_acc_R <string asmString> - : T_HVX_rol_acc <asmString, IntRegs, u5Imm>; + : T_HVX_rol_acc <asmString, IntRegs, u5_0Imm>; def S6_rol_i_p_nac : T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc; @@ -2285,3 +2044,25 @@ def V6_vhistq def V6_vhist : CVI_HIST_Resource1 <(outs), (ins), "vhist" >, V6_vhist_enc; + + +let isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { + def V6_vd0: CVI_VA_Resource<(outs VectorRegs:$dst), (ins), "$dst = #0", []>; + def V6_vd0_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst), (ins), + "$dst = #0", []>; + + def V6_vassignp: CVI_VA_Resource<(outs VecDblRegs:$dst), + (ins VecDblRegs:$src), "", []>; + def V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst), + (ins VecDblRegs128B:$src), "", []>; + + def V6_lo: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1), + "", []>; + def V6_lo_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), "", []>; + + def V6_hi: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1), + "", []>; + def V6_hi_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), "", []>; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td index 0277d5e..e3520bd 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -11,37 +11,6 @@ // //===----------------------------------------------------------------------===// -def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; -def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; -def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; -def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; -def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; -def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; -def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; -def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; - - -multiclass bitconvert_32<ValueType a, ValueType b> { - def : Pat <(b (bitconvert (a IntRegs:$src))), - (b IntRegs:$src)>; - def : Pat <(a (bitconvert (b IntRegs:$src))), - (a IntRegs:$src)>; -} - -multiclass bitconvert_64<ValueType a, ValueType b> { - def : Pat <(b (bitconvert (a DoubleRegs:$src))), - (b DoubleRegs:$src)>; - def : Pat <(a (bitconvert (b DoubleRegs:$src))), - (a DoubleRegs:$src)>; -} - -// Bit convert vector types to integers. -defm : bitconvert_32<v4i8, i32>; -defm : bitconvert_32<v2i16, i32>; -defm : bitconvert_64<v8i8, i64>; -defm : bitconvert_64<v4i16, i64>; -defm : bitconvert_64<v2i32, i64>; - // Vector shift support. Vector shifting in Hexagon is rather different // from internal representation of LLVM. // LLVM assumes all shifts (in vector case) will have the form @@ -51,27 +20,17 @@ defm : bitconvert_64<v2i32, i64>; // As a result, special care is needed to guarantee correctness and // performance. class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> - : S_2OpInstImm<Str, MajOp, MinOp, u4Imm, - [(set (v4i16 DoubleRegs:$dst), - (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> { + : S_2OpInstImm<Str, MajOp, MinOp, u4_0Imm, []> { bits<4> src2; let Inst{11-8} = src2; } class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> - : S_2OpInstImm<Str, MajOp, MinOp, u5Imm, - [(set (v2i32 DoubleRegs:$dst), - (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> { + : S_2OpInstImm<Str, MajOp, MinOp, u5_0Imm, []> { bits<5> src2; let Inst{12-8} = src2; } -def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; - -def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; - def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; @@ -80,87 +39,6 @@ def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; - -def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; -def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; - -// Replicate the low 8-bits from 32-bits input register into each of the -// four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; - -// Replicate the low 16-bits from 32-bits input register into each of the -// four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; - - -class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> - : Pat <(Op Type:$Rss, Type:$Rtt), - (MI Type:$Rss, Type:$Rtt)>; - -def: VArith_pat <A2_vaddub, add, V8I8>; -def: VArith_pat <A2_vaddh, add, V4I16>; -def: VArith_pat <A2_vaddw, add, V2I32>; -def: VArith_pat <A2_vsubub, sub, V8I8>; -def: VArith_pat <A2_vsubh, sub, V4I16>; -def: VArith_pat <A2_vsubw, sub, V2I32>; - -def: VArith_pat <A2_and, and, V2I16>; -def: VArith_pat <A2_xor, xor, V2I16>; -def: VArith_pat <A2_or, or, V2I16>; - -def: VArith_pat <A2_andp, and, V8I8>; -def: VArith_pat <A2_andp, and, V4I16>; -def: VArith_pat <A2_andp, and, V2I32>; -def: VArith_pat <A2_orp, or, V8I8>; -def: VArith_pat <A2_orp, or, V4I16>; -def: VArith_pat <A2_orp, or, V2I32>; -def: VArith_pat <A2_xorp, xor, V8I8>; -def: VArith_pat <A2_xorp, xor, V4I16>; -def: VArith_pat <A2_xorp, xor, V2I32>; - -def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), - (i32 u5ImmPred:$c))))), - (S2_asr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), - (i32 u5ImmPred:$c))))), - (S2_lsr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), - (i32 u5ImmPred:$c))))), - (S2_asl_i_vw V2I32:$b, imm:$c)>; - -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), - (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), - (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), - (S2_asl_i_vh V4I16:$b, imm:$c)>; - - -def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; -def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; - -def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; - -def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), - (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), - (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), - (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), - (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), - (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), - (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; - // Vector shift words by register def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; @@ -173,305 +51,19 @@ def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; -class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> - : Pat <(Op Value:$Rs, I32:$Rt), - (MI Value:$Rs, I32:$Rt)>; - -def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; -def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; -def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; -def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; -def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; -def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; - - -def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; -def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; -def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; - -def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; - - -class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> - : Pat <(i1 (Op Value:$Rs, Value:$Rt)), - (MI Value:$Rs, Value:$Rt)>; - -def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; -def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; -def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; - -def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; -def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; -def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; - -def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; -def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; -def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; - - -class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> - : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), - (MI InVal:$Rs, InVal:$Rt)>; - -def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; -def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; -def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; - -def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; -def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; -def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; - // Hexagon doesn't have a vector multiply with C semantics. // Instead, generate a pseudo instruction that gets expaneded into two // scalar MPYI instructions. // This is expanded by ExpandPostRAPseudos. let isPseudo = 1 in -def VMULW : PseudoM<(outs DoubleRegs:$Rd), - (ins DoubleRegs:$Rs, DoubleRegs:$Rt), - ".error \"Should never try to emit VMULW\"", - [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; +def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>; let isPseudo = 1 in -def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), - (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), - ".error \"Should never try to emit VMULW_ACC\"", - [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], +def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [], "$Rd = $Rx">; -// Adds two v4i8: Hexagon does not have an insn for this one, so we -// use the double add v8i8, and use only the low part of the result. -def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; - -// Subtract two v4i8: Hexagon does not have an insn for this one, so we -// use the double sub v8i8, and use only the low part of the result. -def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; - -// -// No 32 bit vector mux. -// -def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), - (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; -def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), - (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; - -// -// 64-bit vector mux. -// -def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), - (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; -def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), - (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; -def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), - (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; - -// -// No 32 bit vector compare. -// -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; - -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; - - -class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, - ValueType CmpTy> - : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), - (InvMI Value:$Rt, Value:$Rs)>; - -// Map from a compare operation to the corresponding instruction with the -// order of operands reversed, e.g. x > y --> cmp.lt(y,x). -def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; -def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; -def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; -def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; -def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; -def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; - -def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; -def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; -def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; -def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; -def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; -def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; - -// Map from vcmpne(Rss) -> !vcmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; - - -// Truncate: from vector B copy all 'E'ven 'B'yte elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; -def: Pat<(v4i8 (trunc V4I16:$Rs)), - (S2_vtrunehb V4I16:$Rs)>; - -// Truncate: from vector B copy all 'O'dd 'B'yte elements: -// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; -// S2_vtrunohb - -// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; -// S2_vtruneh - -def: Pat<(v2i16 (trunc V2I32:$Rs)), - (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - - -def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; -def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; - -def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; -def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; - -def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; - -// Sign extends a v2i8 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), - (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; - -// Sign extends a v2i16 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), - (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; - - -// Multiplies two v2i16 and returns a v2i32. We are using here the -// saturating multiply, as hexagon does not provide a non saturating -// vector multiply, and saturation does not impact the result that is -// in double precision of the operands. - -// Multiplies two v2i16 vectors: as Hexagon does not have a multiply -// with the C semantics for this one, this pattern uses the half word -// multiply vmpyh that takes two v2i16 and returns a v2i32. This is -// then truncated to fit this back into a v2i16 and to simulate the -// wrap around semantics for unsigned in C. -def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), - (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; - -def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), - (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), - (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; - -// Multiplies two v4i16 vectors. -def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), - (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), - (vmpyh (LoReg $Rs), (LoReg $Rt)))>; - -def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), - (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), - (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; - -// Multiplies two v4i8 vectors. -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, - Requires<[HasV5T]>; - -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; - -// Multiplies two v8i8 vectors. -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, - Requires<[HasV5T]>; - -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; - - -class shuffler<SDNode Op, string Str> - : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), - "$a = " # Str # "($b, $c)", - [(set (i64 DoubleRegs:$a), - (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], - "", S_3op_tc_1_SLOT23>; - -def SDTHexagonBinOp64 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; - -def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; -def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; -def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; -def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; - -class ShufflePat<InstHexagon MI, SDNode Op> - : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), - (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b -def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; - -// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b -def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; - -// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h -def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; - -// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h -def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; - - -// Truncated store from v4i16 to v4i8. -def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; - -// Truncated store from v2i32 to v2i16. -def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; - -def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), - (LoReg $Rs))))>; - -def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; - - -// Zero and sign extended load from v2i8 into v2i16. -def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), - [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; - -def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), - [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; - -def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; - -def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; -def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), - (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; -def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), - (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index a319dd4..d4f303b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -774,13 +774,13 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>; def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>; def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>; -def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>; +def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32_0ImmPred, s8_0ImmPred>; // Mux def : T_QRR_pat<C2_mux, int_hexagon_C2_mux>; -def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>; -def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>; -def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>; +def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32_0ImmPred>; +def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32_0ImmPred>; +def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32_0ImmPred, s8_0ImmPred>; // Shift halfword def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; @@ -801,17 +801,15 @@ def : T_Q_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>; def : T_Q_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>; def : T_Q_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>; -def : T_Q_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>; -def : T_Q_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>; -def : T_Q_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>; +def : T_Q_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32_0ImmPred>; +def : T_Q_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32_0ImmPred>; +def : T_Q_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32_0ImmPred>; -def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32ImmPred:$src2), - (C2_tfrpr (C2_cmpgti I32:$src1, - (DEC_CONST_SIGNED s32ImmPred:$src2)))>; +def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2), + (C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>; -def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32ImmPred:$src2), - (C2_tfrpr (C2_cmpgtui I32:$src1, - (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; +def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2), + (C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>; def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0), (C2_tfrpr (C2_cmpeq I32:$src, I32:$src))>; @@ -1104,14 +1102,14 @@ def : Pat<(i64 (int_hexagon_S2_insertp_rp I64:$src1, I64:$src2, I64:$src3)), (i64 (S2_insertp_rp I64:$src1, I64:$src2, I64:$src3))>; def : Pat<(int_hexagon_S2_insert I32:$src1, I32:$src2, - u5ImmPred:$src3, u5ImmPred:$src4), + u5_0ImmPred:$src3, u5_0ImmPred:$src4), (S2_insert I32:$src1, I32:$src2, - u5ImmPred:$src3, u5ImmPred:$src4)>; + u5_0ImmPred:$src3, u5_0ImmPred:$src4)>; def : Pat<(i64 (int_hexagon_S2_insertp I64:$src1, I64:$src2, - u6ImmPred:$src3, u6ImmPred:$src4)), + u6_0ImmPred:$src3, u6_0ImmPred:$src4)), (i64 (S2_insertp I64:$src1, I64:$src2, - u6ImmPred:$src3, u6ImmPred:$src4))>; + u6_0ImmPred:$src3, u6_0ImmPred:$src4))>; // Innterleave/deinterleave def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>; @@ -1239,10 +1237,19 @@ def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>; //===----------------------------------------------------------------------===// class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst, SDNodeXForm XformImm> - : Pat <(IntID I32:$src1, I32:$src2, u4ImmPred:$src3, u5ImmPred:$src4), - (OutputInst I32:$src1, I32:$src2, u4ImmPred:$src3, - (XformImm u5ImmPred:$src4))>; + : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4), + (OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3, + (XformImm u5_0ImmPred:$src4))>; +def SDEC2 : SDNodeXForm<imm, [{ + int32_t V = N->getSExtValue(); + return CurDAG->getTargetConstant(V-2, SDLoc(N), MVT::i32); +}]>; + +def SDEC3 : SDNodeXForm<imm, [{ + int32_t V = N->getSExtValue(); + return CurDAG->getTargetConstant(V-3, SDLoc(N), MVT::i32); +}]>; // Table Index : Extract and insert bits. // Map to the real hardware instructions after subtracting appropriate @@ -1250,16 +1257,16 @@ class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst, // needed for int_hexagon_S2_tableidxb_goodsyntax. def : Pat <(int_hexagon_S2_tableidxb_goodsyntax I32:$src1, I32:$src2, - u4ImmPred:$src3, u5ImmPred:$src4), + u4_0ImmPred:$src3, u5_0ImmPred:$src4), (S2_tableidxb I32:$src1, I32:$src2, - u4ImmPred:$src3, u5ImmPred:$src4)>; + u4_0ImmPred:$src3, u5_0ImmPred:$src4)>; def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh, - DEC_CONST_SIGNED>; + SDEC1>; def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw, - DEC2_CONST_SIGNED>; + SDEC2>; def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd, - DEC3_CONST_SIGNED>; + SDEC3>; //******************************************************************* // STYPE/VH diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td index 4c28b28..400c173 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -20,21 +20,21 @@ def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), (EXTRACT_SUBREG (i64 (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg)), + isub_lo)), (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)))), - subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))), + isub_lo)))), + isub_hi)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_hi))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_lo)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_hi))), (i32 (EXTRACT_SUBREG (i64 (M2_dpmpyuu_s0 - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)), (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)))), subreg_loreg))))>; + isub_lo)))), isub_lo))))>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td index 578973d..2affe53 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -167,15 +167,15 @@ def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>; // Rotate and reduce bytes def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3), - (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>; + u2_0ImmPred:$src3), + (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>; // Rotate and reduce bytes with accumulation // Rxx+=vrcrotate(Rss,Rt,#u2) def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3, u2ImmPred:$src4), + IntRegs:$src3, u2_0ImmPred:$src4), (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3, u2ImmPred:$src4)>; + IntRegs:$src3, u2_0ImmPred:$src4)>; // Vector conditional negate def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>; @@ -223,17 +223,17 @@ def: T_RR_pat<A4_orn, int_hexagon_A4_orn>; //******************************************************************* // Combine Words Into Doublewords. -def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>; -def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>; +def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>; +def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>; //******************************************************************* // ALU32/PRED //******************************************************************* // Compare -def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>; -def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>; -def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>; +def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>; +def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>; +def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>; // Compare To General Register. def: T_Q_RR_pat<C4_cmpneq, int_hexagon_C4_cmpneq>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td index 82bc91b..a45e1c9 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td @@ -12,72 +12,21 @@ //===----------------------------------------------------------------------===// -let isCodeGenOnly = 1 in { -def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst), - (ins ), - "$dst=#0", - [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>; - -def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), - (ins ), - "$dst=#0", - [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>; -} - -let isPseudo = 1 in -def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst), - (ins VecDblRegs:$src1), - "$dst=vassignp_W($src1)", - [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>; - -let isPseudo = 1 in -def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst), - (ins VecDblRegs128B:$src1), - "$dst=vassignp_W_128B($src1)", - [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B - VecDblRegs128B:$src1))]>; - -let isPseudo = 1 in -def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst), - (ins VecDblRegs:$src1), - "$dst=lo_W($src1)", - [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>; - -let isPseudo = 1 in -def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst), - (ins VecDblRegs:$src1), - "$dst=hi_W($src1)", - [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>; - -let isPseudo = 1 in -def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), - (ins VecDblRegs128B:$src1), - "$dst=lo_W($src1)", - [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>; - -let isPseudo = 1 in -def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), - (ins VecDblRegs128B:$src1), - "$dst=hi_W($src1)", - [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>; - let AddedComplexity = 100 in { def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))), - (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >, + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), vsub_lo)) >, Requires<[UseHVXSgl]>; def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))), - (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >, + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), vsub_hi)) >, Requires<[UseHVXSgl]>; def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))), - (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), - subreg_loreg)) >, + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), vsub_lo)) >, Requires<[UseHVXDbl]>; def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))), - (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), - subreg_hireg)) >, + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), vsub_hi)) >, Requires<[UseHVXDbl]>; } @@ -204,6 +153,16 @@ multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> { Requires<[UseHVXDbl]>; } +multiclass T_W_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1), + (MI VecDblRegs:$src1)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1)>, + Requires<[UseHVXDbl]>; +} + multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> { def: Pat<(IntID VecPredRegs:$src1), (MI VecPredRegs:$src1)>, @@ -495,7 +454,7 @@ multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> { Requires<[UseHVXDbl]>; } -defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>; +defm : T_WR_pat <V6_vtmpyb, int_hexagon_V6_vtmpyb>; defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>; defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>; defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>; @@ -751,6 +710,10 @@ defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>; defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>; defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>; +defm : T_W_pat <V6_lo, int_hexagon_V6_lo>; +defm : T_W_pat <V6_hi, int_hexagon_V6_hi>; +defm : T_W_pat <V6_vassignp, int_hexagon_V6_vassignp>; + defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>; defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>; defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>; @@ -831,8 +794,10 @@ def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>; def: Pat<(v64i16 (trunc v64i32:$Vdd)), (v64i16 (V6_vpackwh_sat_128B - (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)), - (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>, + (v32i32 (V6_hi_128B VecDblRegs128B:$Vdd)), + (v32i32 (V6_lo_128B VecDblRegs128B:$Vdd))))>, Requires<[UseHVXDbl]>; +def: Pat<(int_hexagon_V6_vd0), (V6_vd0)>; +def: Pat<(int_hexagon_V6_vd0_128B), (V6_vd0_128B)>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td index 0ca95e9..ebedf2c 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td @@ -13,9 +13,9 @@ // SA1_combine1i: Combines. let isCodeGenOnly = 1, hasSideEffects = 0 in -def V4_SA1_combine1i: SUBInst < +def SA1_combine1i: SUBInst < (outs DoubleRegs:$Rdd), - (ins u2Imm:$u2), + (ins u2_0Imm:$u2), "$Rdd = combine(#1, #$u2)"> { bits<3> Rdd; bits<2> u2; @@ -30,7 +30,7 @@ def V4_SA1_combine1i: SUBInst < // SL2_jumpr31_f: Indirect conditional jump if false. // SL2_jumpr31_f -> SL2_jumpr31_fnew let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in -def V4_SL2_jumpr31_f: SUBInst < +def SL2_jumpr31_f: SUBInst < (outs ), (ins ), "if (!p0) jumpr r31"> { @@ -40,7 +40,7 @@ def V4_SL2_jumpr31_f: SUBInst < // SL2_deallocframe: Deallocate stack frame. let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in -def V4_SL2_deallocframe: SUBInst < +def SL2_deallocframe: SUBInst < (outs ), (ins ), "deallocframe"> { @@ -51,7 +51,7 @@ def V4_SL2_deallocframe: SUBInst < // SL2_return_f: Deallocate stack frame and return. // SL2_return_f -> SL2_return_fnew let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in -def V4_SL2_return_f: SUBInst < +def SL2_return_f: SUBInst < (outs ), (ins ), "if (!p0) dealloc_return"> { @@ -61,9 +61,9 @@ def V4_SL2_return_f: SUBInst < // SA1_combine3i: Combines. let isCodeGenOnly = 1, hasSideEffects = 0 in -def V4_SA1_combine3i: SUBInst < +def SA1_combine3i: SUBInst < (outs DoubleRegs:$Rdd), - (ins u2Imm:$u2), + (ins u2_0Imm:$u2), "$Rdd = combine(#3, #$u2)"> { bits<3> Rdd; bits<2> u2; @@ -77,7 +77,7 @@ def V4_SA1_combine3i: SUBInst < // SS2_storebi0: Store byte. let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in -def V4_SS2_storebi0: SUBInst < +def SS2_storebi0: SUBInst < (outs ), (ins IntRegs:$Rs, u4_0Imm:$u4_0), "memb($Rs + #$u4_0)=#0"> { @@ -91,10 +91,10 @@ def V4_SS2_storebi0: SUBInst < // SA1_clrtnew: Clear if true. let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_clrtnew: SUBInst < +def SA1_clrtnew: SUBInst < (outs IntRegs:$Rd), - (ins ), - "if (p0.new) $Rd = #0"> { + (ins PredRegs:$Pu), + "if ($Pu.new) $Rd = #0"> { bits<4> Rd; let Inst{12-9} = 0b1101; @@ -104,7 +104,7 @@ def V4_SA1_clrtnew: SUBInst < // SL2_loadruh_io: Load half. let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in -def V4_SL2_loadruh_io: SUBInst < +def SL2_loadruh_io: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, u3_1Imm:$u3_1), "$Rd = memuh($Rs + #$u3_1)"> { @@ -120,7 +120,7 @@ def V4_SL2_loadruh_io: SUBInst < // SL2_jumpr31_tnew: Indirect conditional jump if true. let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in -def V4_SL2_jumpr31_tnew: SUBInst < +def SL2_jumpr31_tnew: SUBInst < (outs ), (ins ), "if (p0.new) jumpr:nt r31"> { @@ -130,9 +130,9 @@ def V4_SL2_jumpr31_tnew: SUBInst < // SA1_addi: Add. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in -def V4_SA1_addi: SUBInst < +def SA1_addi: SUBInst < (outs IntRegs:$Rx), - (ins IntRegs:$_src_, s7Ext:$s7), + (ins IntRegs:$_src_, s7_0Ext:$s7), "$Rx = add($_src_, #$s7)" , [] , "$_src_ = $Rx"> { @@ -146,7 +146,7 @@ def V4_SA1_addi: SUBInst < // SL1_loadrub_io: Load byte. let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in -def V4_SL1_loadrub_io: SUBInst < +def SL1_loadrub_io: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, u4_0Imm:$u4_0), "$Rd = memub($Rs + #$u4_0)"> { @@ -162,7 +162,7 @@ def V4_SL1_loadrub_io: SUBInst < // SL1_loadri_io: Load word. let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in -def V4_SL1_loadri_io: SUBInst < +def SL1_loadri_io: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, u4_2Imm:$u4_2), "$Rd = memw($Rs + #$u4_2)"> { @@ -178,9 +178,9 @@ def V4_SL1_loadri_io: SUBInst < // SA1_cmpeqi: Compareimmed. let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in -def V4_SA1_cmpeqi: SUBInst < +def SA1_cmpeqi: SUBInst < (outs ), - (ins IntRegs:$Rs, u2Imm:$u2), + (ins IntRegs:$Rs, u2_0Imm:$u2), "p0 = cmp.eq($Rs, #$u2)"> { bits<4> Rs; bits<2> u2; @@ -192,7 +192,7 @@ def V4_SA1_cmpeqi: SUBInst < // SA1_combinerz: Combines. let isCodeGenOnly = 1, hasSideEffects = 0 in -def V4_SA1_combinerz: SUBInst < +def SA1_combinerz: SUBInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs), "$Rdd = combine($Rs, #0)"> { @@ -209,7 +209,7 @@ def V4_SA1_combinerz: SUBInst < // SL2_return_t: Deallocate stack frame and return. // SL2_return_t -> SL2_return_tnew let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in -def V4_SL2_return_t: SUBInst < +def SL2_return_t: SUBInst < (outs ), (ins ), "if (p0) dealloc_return"> { @@ -219,7 +219,7 @@ def V4_SL2_return_t: SUBInst < // SS2_allocframe: Allocate stack frame. let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in -def V4_SS2_allocframe: SUBInst < +def SS2_allocframe: SUBInst < (outs ), (ins u5_3Imm:$u5_3), "allocframe(#$u5_3)"> { @@ -231,7 +231,7 @@ def V4_SS2_allocframe: SUBInst < // SS2_storeh_io: Store half. let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in -def V4_SS2_storeh_io: SUBInst < +def SS2_storeh_io: SUBInst < (outs ), (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt), "memh($Rs + #$u3_1) = $Rt"> { @@ -247,7 +247,7 @@ def V4_SS2_storeh_io: SUBInst < // SS2_storewi0: Store word. let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in -def V4_SS2_storewi0: SUBInst < +def SS2_storewi0: SUBInst < (outs ), (ins IntRegs:$Rs, u4_2Imm:$u4_2), "memw($Rs + #$u4_2)=#0"> { @@ -261,7 +261,7 @@ def V4_SS2_storewi0: SUBInst < // SS2_storewi1: Store word. let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in -def V4_SS2_storewi1: SUBInst < +def SS2_storewi1: SUBInst < (outs ), (ins IntRegs:$Rs, u4_2Imm:$u4_2), "memw($Rs + #$u4_2)=#1"> { @@ -275,7 +275,7 @@ def V4_SS2_storewi1: SUBInst < // SL2_jumpr31: Indirect conditional jump if true. let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in -def V4_SL2_jumpr31: SUBInst < +def SL2_jumpr31: SUBInst < (outs ), (ins ), "jumpr r31"> { @@ -285,7 +285,7 @@ def V4_SL2_jumpr31: SUBInst < // SA1_combinezr: Combines. let isCodeGenOnly = 1, hasSideEffects = 0 in -def V4_SA1_combinezr: SUBInst < +def SA1_combinezr: SUBInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs), "$Rdd = combine(#0, $Rs)"> { @@ -301,7 +301,7 @@ def V4_SA1_combinezr: SUBInst < // SL2_loadrh_io: Load half. let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in -def V4_SL2_loadrh_io: SUBInst < +def SL2_loadrh_io: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, u3_1Imm:$u3_1), "$Rd = memh($Rs + #$u3_1)"> { @@ -317,7 +317,7 @@ def V4_SL2_loadrh_io: SUBInst < // SA1_addrx: Add. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_addrx: SUBInst < +def SA1_addrx: SUBInst < (outs IntRegs:$Rx), (ins IntRegs:$_src_, IntRegs:$Rs), "$Rx = add($_src_, $Rs)" , @@ -333,10 +333,10 @@ def V4_SA1_addrx: SUBInst < // SA1_setin1: Set to -1. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_setin1: SUBInst < +def SA1_setin1: SUBInst < (outs IntRegs:$Rd), (ins ), - "$Rd = #-1"> { + "$Rd = #{-1}"> { bits<4> Rd; let Inst{12-9} = 0b1101; @@ -346,7 +346,7 @@ def V4_SA1_setin1: SUBInst < // SA1_sxth: Sxth. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_sxth: SUBInst < +def SA1_sxth: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = sxth($Rs)"> { @@ -360,9 +360,9 @@ def V4_SA1_sxth: SUBInst < // SA1_combine0i: Combines. let isCodeGenOnly = 1, hasSideEffects = 0 in -def V4_SA1_combine0i: SUBInst < +def SA1_combine0i: SUBInst < (outs DoubleRegs:$Rdd), - (ins u2Imm:$u2), + (ins u2_0Imm:$u2), "$Rdd = combine(#0, #$u2)"> { bits<3> Rdd; bits<2> u2; @@ -376,9 +376,9 @@ def V4_SA1_combine0i: SUBInst < // SA1_combine2i: Combines. let isCodeGenOnly = 1, hasSideEffects = 0 in -def V4_SA1_combine2i: SUBInst < +def SA1_combine2i: SUBInst < (outs DoubleRegs:$Rdd), - (ins u2Imm:$u2), + (ins u2_0Imm:$u2), "$Rdd = combine(#2, #$u2)"> { bits<3> Rdd; bits<2> u2; @@ -392,7 +392,7 @@ def V4_SA1_combine2i: SUBInst < // SA1_sxtb: Sxtb. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_sxtb: SUBInst < +def SA1_sxtb: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = sxtb($Rs)"> { @@ -407,10 +407,10 @@ def V4_SA1_sxtb: SUBInst < // SA1_clrf: Clear if false. // SA1_clrf -> SA1_clrfnew let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_clrf: SUBInst < +def SA1_clrf: SUBInst < (outs IntRegs:$Rd), - (ins ), - "if (!p0) $Rd = #0"> { + (ins PredRegs:$Pu), + "if (!$Pu) $Rd = #0"> { bits<4> Rd; let Inst{12-9} = 0b1101; @@ -420,7 +420,7 @@ def V4_SA1_clrf: SUBInst < // SL2_loadrb_io: Load byte. let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in -def V4_SL2_loadrb_io: SUBInst < +def SL2_loadrb_io: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, u3_0Imm:$u3_0), "$Rd = memb($Rs + #$u3_0)"> { @@ -436,7 +436,7 @@ def V4_SL2_loadrb_io: SUBInst < // SA1_tfr: Tfr. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_tfr: SUBInst < +def SA1_tfr: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = $Rs"> { @@ -450,7 +450,7 @@ def V4_SA1_tfr: SUBInst < // SL2_loadrd_sp: Load dword. let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in -def V4_SL2_loadrd_sp: SUBInst < +def SL2_loadrd_sp: SUBInst < (outs DoubleRegs:$Rdd), (ins u5_3Imm:$u5_3), "$Rdd = memd(r29 + #$u5_3)"> { @@ -464,7 +464,7 @@ def V4_SL2_loadrd_sp: SUBInst < // SA1_and1: And #1. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_and1: SUBInst < +def SA1_and1: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = and($Rs, #1)"> { @@ -478,7 +478,7 @@ def V4_SA1_and1: SUBInst < // SS2_storebi1: Store byte. let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in -def V4_SS2_storebi1: SUBInst < +def SS2_storebi1: SUBInst < (outs ), (ins IntRegs:$Rs, u4_0Imm:$u4_0), "memb($Rs + #$u4_0)=#1"> { @@ -492,7 +492,7 @@ def V4_SS2_storebi1: SUBInst < // SA1_inc: Inc. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_inc: SUBInst < +def SA1_inc: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = add($Rs, #1)"> { @@ -506,7 +506,7 @@ def V4_SA1_inc: SUBInst < // SS2_stored_sp: Store dword. let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in -def V4_SS2_stored_sp: SUBInst < +def SS2_stored_sp: SUBInst < (outs ), (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt), "memd(r29 + #$s6_3) = $Rtt"> { @@ -520,7 +520,7 @@ def V4_SS2_stored_sp: SUBInst < // SS2_storew_sp: Store word. let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in -def V4_SS2_storew_sp: SUBInst < +def SS2_storew_sp: SUBInst < (outs ), (ins u5_2Imm:$u5_2, IntRegs:$Rt), "memw(r29 + #$u5_2) = $Rt"> { @@ -534,7 +534,7 @@ def V4_SS2_storew_sp: SUBInst < // SL2_jumpr31_fnew: Indirect conditional jump if false. let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in -def V4_SL2_jumpr31_fnew: SUBInst < +def SL2_jumpr31_fnew: SUBInst < (outs ), (ins ), "if (!p0.new) jumpr:nt r31"> { @@ -545,10 +545,10 @@ def V4_SL2_jumpr31_fnew: SUBInst < // SA1_clrt: Clear if true. // SA1_clrt -> SA1_clrtnew let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_clrt: SUBInst < +def SA1_clrt: SUBInst < (outs IntRegs:$Rd), - (ins ), - "if (p0) $Rd = #0"> { + (ins PredRegs:$Pu), + "if ($Pu) $Rd = #0"> { bits<4> Rd; let Inst{12-9} = 0b1101; @@ -558,7 +558,7 @@ def V4_SA1_clrt: SUBInst < // SL2_return: Deallocate stack frame and return. let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in -def V4_SL2_return: SUBInst < +def SL2_return: SUBInst < (outs ), (ins ), "dealloc_return"> { @@ -568,10 +568,10 @@ def V4_SL2_return: SUBInst < // SA1_dec: Dec. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_dec: SUBInst < +def SA1_dec: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), - "$Rd = add($Rs,#-1)"> { + "$Rd = add($Rs,#{-1})"> { bits<4> Rd; bits<4> Rs; @@ -582,9 +582,9 @@ def V4_SA1_dec: SUBInst < // SA1_seti: Set immed. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in -def V4_SA1_seti: SUBInst < +def SA1_seti: SUBInst < (outs IntRegs:$Rd), - (ins u6Ext:$u6), + (ins u6_0Ext:$u6), "$Rd = #$u6"> { bits<4> Rd; bits<6> u6; @@ -597,7 +597,7 @@ def V4_SA1_seti: SUBInst < // SL2_jumpr31_t: Indirect conditional jump if true. // SL2_jumpr31_t -> SL2_jumpr31_tnew let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in -def V4_SL2_jumpr31_t: SUBInst < +def SL2_jumpr31_t: SUBInst < (outs ), (ins ), "if (p0) jumpr r31"> { @@ -607,10 +607,10 @@ def V4_SL2_jumpr31_t: SUBInst < // SA1_clrfnew: Clear if false. let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_clrfnew: SUBInst < +def SA1_clrfnew: SUBInst < (outs IntRegs:$Rd), - (ins ), - "if (!p0.new) $Rd = #0"> { + (ins PredRegs:$Pu), + "if (!$Pu.new) $Rd = #0"> { bits<4> Rd; let Inst{12-9} = 0b1101; @@ -620,7 +620,7 @@ def V4_SA1_clrfnew: SUBInst < // SS1_storew_io: Store word. let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in -def V4_SS1_storew_io: SUBInst < +def SS1_storew_io: SUBInst < (outs ), (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt), "memw($Rs + #$u4_2) = $Rt"> { @@ -636,7 +636,7 @@ def V4_SS1_storew_io: SUBInst < // SA1_zxtb: Zxtb. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_zxtb: SUBInst < +def SA1_zxtb: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = and($Rs, #255)"> { @@ -650,7 +650,7 @@ def V4_SA1_zxtb: SUBInst < // SA1_addsp: Add. let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_addsp: SUBInst < +def SA1_addsp: SUBInst < (outs IntRegs:$Rd), (ins u6_2Imm:$u6_2), "$Rd = add(r29, #$u6_2)"> { @@ -664,7 +664,7 @@ def V4_SA1_addsp: SUBInst < // SL2_loadri_sp: Load word. let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in -def V4_SL2_loadri_sp: SUBInst < +def SL2_loadri_sp: SUBInst < (outs IntRegs:$Rd), (ins u5_2Imm:$u5_2), "$Rd = memw(r29 + #$u5_2)"> { @@ -678,7 +678,7 @@ def V4_SL2_loadri_sp: SUBInst < // SS1_storeb_io: Store byte. let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in -def V4_SS1_storeb_io: SUBInst < +def SS1_storeb_io: SUBInst < (outs ), (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt), "memb($Rs + #$u4_0) = $Rt"> { @@ -694,7 +694,7 @@ def V4_SS1_storeb_io: SUBInst < // SL2_return_tnew: Deallocate stack frame and return. let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in -def V4_SL2_return_tnew: SUBInst < +def SL2_return_tnew: SUBInst < (outs ), (ins ), "if (p0.new) dealloc_return:nt"> { @@ -704,7 +704,7 @@ def V4_SL2_return_tnew: SUBInst < // SL2_return_fnew: Deallocate stack frame and return. let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in -def V4_SL2_return_fnew: SUBInst < +def SL2_return_fnew: SUBInst < (outs ), (ins ), "if (!p0.new) dealloc_return:nt"> { @@ -714,7 +714,7 @@ def V4_SL2_return_fnew: SUBInst < // SA1_zxth: Zxth. let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -def V4_SA1_zxth: SUBInst < +def SA1_zxth: SUBInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), "$Rd = zxth($Rs)"> { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 26c5b63..d83bcbc 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -15,45 +15,35 @@ namespace llvm { - namespace Hexagon { +namespace Hexagon { + const unsigned int StartPacket = 0x1; const unsigned int EndPacket = 0x2; - } +} // end namespace Hexagon /// Hexagon target-specific information for each MachineFunction. class HexagonMachineFunctionInfo : public MachineFunctionInfo { // SRetReturnReg - Some subtargets require that sret lowering includes // returning the value of the returned struct in a register. This field // holds the virtual register into which the sret argument is passed. - unsigned SRetReturnReg; - unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual) - unsigned StackAlignBasePhysReg; // (physical) - std::vector<MachineInstr*> AllocaAdjustInsts; + unsigned SRetReturnReg = 0; + unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual) + unsigned StackAlignBasePhysReg = 0; // (physical) int VarArgsFrameIndex; - bool HasClobberLR; - bool HasEHReturn; + bool HasClobberLR = false; + bool HasEHReturn = false; std::map<const MachineInstr*, unsigned> PacketInfo; virtual void anchor(); public: - HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0), - StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {} + HexagonMachineFunctionInfo() = default; - HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), - StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0), - HasEHReturn(false) {} + HexagonMachineFunctionInfo(MachineFunction &MF) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } - void addAllocaAdjustInst(MachineInstr* MI) { - AllocaAdjustInsts.push_back(MI); - } - const std::vector<MachineInstr*>& getAllocaAdjustInsts() { - return AllocaAdjustInsts; - } - void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; } int getVarArgsFrameIndex() { return VarArgsFrameIndex; } @@ -83,6 +73,7 @@ public: void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; } unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; } }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 6dcac0d..9ff9d93 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -51,6 +51,7 @@ using namespace llvm; #define DEBUG_TYPE "misched" +namespace { class HexagonCallMutation : public ScheduleDAGMutation { public: void apply(ScheduleDAGInstrs *DAG) override; @@ -58,6 +59,7 @@ private: bool shouldTFRICallBind(const HexagonInstrInfo &HII, const SUnit &Inst1, const SUnit &Inst2) const; }; +} // end anonymous namespace // Check if a call and subsequent A2_tfrpi instructions should maintain // scheduling affinity. We are looking for the TFRI to be consumed in @@ -72,7 +74,7 @@ bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII, return false; // TypeXTYPE are 64 bit operations. - if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE) + if (HII.getType(*Inst2.getInstr()) == HexagonII::TypeXTYPE) return true; return false; } @@ -168,7 +170,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { continue; // Enable .cur formation. - if (QII.mayBeCurLoad(Packet[i]->getInstr())) + if (QII.mayBeCurLoad(*Packet[i]->getInstr())) continue; for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), @@ -616,7 +618,7 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, if (!SU || SU->isScheduled) return ResCount; - MachineInstr *Instr = SU->getInstr(); + MachineInstr &Instr = *SU->getInstr(); DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); // Forced priority is high. @@ -705,7 +707,7 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // available for it. auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); auto &QII = *QST.getInstrInfo(); - if (SU->isInstr() && QII.mayBeCurLoad(SU->getInstr())) { + if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) { if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) { ResCount += PriorityTwo; DEBUG(if (verbose) dbgs() << "C|"); @@ -744,11 +746,11 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Check for stalls in the previous packet. if (Q.getID() == TopQID) { for (auto J : Top.ResourceModel->OldPacket) - if (QII.producesStall(J->getInstr(), Instr)) + if (QII.producesStall(*J->getInstr(), Instr)) ResCount -= PriorityOne; } else { for (auto J : Bot.ResourceModel->OldPacket) - if (QII.producesStall(Instr, J->getInstr())) + if (QII.producesStall(Instr, *J->getInstr())) ResCount -= PriorityOne; } } @@ -841,8 +843,8 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, const MachineInstr *CandI = Candidate.SU->getInstr(); const InstrItineraryData *InstrItins = QST.getInstrItineraryData(); - unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, MI); - unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, CandI); + unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI); + unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI); DEBUG(dbgs() << "TC Tie Breaker Cand: " << CandLatency << " Instr:" << InstrLatency << "\n" << *MI << *CandI << "\n"); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h index 51c84a4..dc10028 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -94,9 +94,7 @@ public: void savePacket(); unsigned getTotalPackets() const { return TotalPackets; } - bool isInPacket(SUnit *SU) const { - return std::find(Packet.begin(), Packet.end(), SU) != Packet.end(); - } + bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); } }; /// Extend the standard ScheduleDAGMI to provide more context and override the diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 3ffb9cf..72d8011 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -29,7 +29,6 @@ #include "HexagonTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -79,14 +78,12 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { - return "Hexagon NewValueJump"; - } + StringRef getPassName() const override { return "Hexagon NewValueJump"; } bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } private: @@ -180,7 +177,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, return false; // if call in path, bail out. - if (MII->getOpcode() == Hexagon::J2_call) + if (MII->isCall()) return false; // if NVJ is running prior to RA, do the following checks. @@ -189,9 +186,9 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // to new value jump. If they are in the path, bail out. // KILL sets kill flag on the opcode. It also sets up a // single register, out of pair. - // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill> + // %D0<def> = S2_lsr_r_p %D0<kill>, %R2<kill> // %R0<def> = KILL %R0, %D0<imp-use,kill> - // %P0<def> = CMPEQri %R0<kill>, 0 + // %P0<def> = C2_cmpeqi %R0<kill>, 0 // PHI can be anything after RA. // COPY can remateriaze things in between feeder, compare and nvj. if (MII->getOpcode() == TargetOpcode::KILL || @@ -203,7 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // of registers by individual passes in the backend. At this time, // we don't know the scope of usage and definitions of these // instructions. - if (MII->getOpcode() == Hexagon::LDriw_pred || + if (MII->getOpcode() == Hexagon::LDriw_pred || MII->getOpcode() == Hexagon::STriw_pred) return false; } @@ -226,10 +223,23 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // range specified by the arch. if (!secondReg) { int64_t v = MI.getOperand(2).getImm(); + bool Valid = false; - if (!(isUInt<5>(v) || ((MI.getOpcode() == Hexagon::C2_cmpeqi || - MI.getOpcode() == Hexagon::C2_cmpgti) && - (v == -1)))) + switch (MI.getOpcode()) { + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + Valid = (isUInt<5>(v) || v == -1); + break; + case Hexagon::C2_cmpgtui: + Valid = isUInt<5>(v); + break; + case Hexagon::S2_tstbit_i: + case Hexagon::S4_ntstbit_i: + Valid = (v == 0); + break; + } + + if (!Valid) return false; } @@ -239,6 +249,11 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, if (secondReg) { cmpOp2 = MI.getOperand(2).getReg(); + // If the same register appears as both operands, we cannot generate a new + // value compare. Only one operand may use the .new suffix. + if (cmpReg1 == cmpOp2) + return false; + // Make sure that that second register is not from COPY // At machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. @@ -255,6 +270,8 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, ++II ; for (MachineBasicBlock::iterator localII = II; localII != end; ++localII) { + if (localII->isDebugValue()) + continue; // Check 1. // If "common" checks fail, bail out. @@ -449,7 +466,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n"); if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt || + MI.getOpcode() == Hexagon::J2_jumptpt || MI.getOpcode() == Hexagon::J2_jumpf || + MI.getOpcode() == Hexagon::J2_jumpfpt || MI.getOpcode() == Hexagon::J2_jumptnewpt || MI.getOpcode() == Hexagon::J2_jumptnew || MI.getOpcode() == Hexagon::J2_jumpfnewpt || @@ -472,7 +491,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always - // be 2. Check if the predicate register is live in in those + // be 2. Check if the predicate register is live-in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. @@ -580,8 +599,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. - if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && - feederReg == (unsigned) cmpOp2) { + unsigned COp = cmpInstr->getOpcode(); + if ((COp == Hexagon::C2_cmpeq || COp == Hexagon::C4_cmpneq) && + (feederReg == (unsigned) cmpOp2)) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; @@ -647,16 +667,6 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); - else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi || - cmpInstr->getOpcode() == Hexagon::C2_cmpgti) && - cmpOp2 == -1 ) - // Corresponding new-value compare jump instructions don't have the - // operand for -1 immediate value. - NewMI = BuildMI(*MBB, jmpPos, dl, - QII->get(opc)) - .addReg(cmpReg1, getKillRegState(MO1IsKill)) - .addMBB(jmpTarget); - else NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td index 11092d2..9833105 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td @@ -7,58 +7,53 @@ // //===----------------------------------------------------------------------===// -def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; } +def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; } def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; } -def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; } -def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; } -def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; } -def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; } +def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; } +def s8_0Imm64Operand : AsmOperandClass { let Name = "s8_0Imm64"; } +def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; } def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; } def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; } def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; } def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; } def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; } def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; } -def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; } -def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; } +def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; } +def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; } def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; } -def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; } def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; } def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; } def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; } def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; } def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; } -def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; } -def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; } -def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; } -def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; } -def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; } +def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; } +def u9_0ImmOperand : AsmOperandClass { let Name = "u9_0Imm"; } +def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; } +def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; } def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; } def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; } def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; } def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; } -def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; } -def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; } -def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; } -def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; } -def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; } -def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; } +def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; } +def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; } +def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; } +def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; } +def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; } +def n8_0ImmOperand : AsmOperandClass { let Name = "n8_0Imm"; } // Immediate operands. let OperandType = "OPERAND_IMMEDIATE", DecoderMethod = "unsignedImmDecoder" in { - def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand; - let DecoderMethod = "s32ImmDecoder"; } + def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; + let DecoderMethod = "s32_0ImmDecoder"; } def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; } - def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand; - let DecoderMethod = "s8ImmDecoder"; } - def s8Imm64 : Operand<i64> { let ParserMatchClass = s8Imm64Operand; - let DecoderMethod = "s8ImmDecoder"; } - def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand; + def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; + let DecoderMethod = "s8_0ImmDecoder"; } + def s8_0Imm64 : Operand<i64> { let ParserMatchClass = s8_0Imm64Operand; + let DecoderMethod = "s8_0ImmDecoder"; } + def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; } def s6_3Imm : Operand<i32>; - def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand; - let DecoderMethod = "s4_0ImmDecoder"; } def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; } def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; @@ -67,42 +62,37 @@ let OperandType = "OPERAND_IMMEDIATE", let DecoderMethod = "s4_2ImmDecoder"; } def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; } - def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; } - def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; } + def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; } + def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; } def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; } - def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; } def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; } def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; } def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; } def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; } def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; } - def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; } - def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; } - def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; } - def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; } - def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; } + def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; } + def u9_0Imm : Operand<i32> { let ParserMatchClass = u9_0ImmOperand; } + def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; } + def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; } def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; } def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; } def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; } def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; } - def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; } - def u5_0Imm : Operand<i32>; + def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; } def u5_1Imm : Operand<i32>; def u5_2Imm : Operand<i32>; def u5_3Imm : Operand<i32>; - def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; } - def u4_0Imm : Operand<i32>; + def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; } def u4_1Imm : Operand<i32>; def u4_2Imm : Operand<i32>; def u4_3Imm : Operand<i32>; - def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; } - def u3_0Imm : Operand<i32>; + def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; } def u3_1Imm : Operand<i32>; def u3_2Imm : Operand<i32>; def u3_3Imm : Operand<i32>; - def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; } - def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; } - def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; } + def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; } + def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; } + def n8_0Imm : Operand<i32> { let ParserMatchClass = n8_0ImmOperand; } } let OperandType = "OPERAND_IMMEDIATE" in { @@ -117,15 +107,12 @@ let OperandType = "OPERAND_IMMEDIATE" in { def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand"; let DecoderMethod = "s3_6ImmDecoder";} } +def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; } +def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; } // // Immediate predicates // -def s32ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<32>(v); -}]>; - def s32_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<32>(v); @@ -146,47 +133,22 @@ def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<29,3>(v); }]>; -def s16ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<16>(v); -}]>; - -def s11_0ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<11>(v); -}]>; - -def s11_1ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,1>(v); -}]>; - -def s11_2ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,2>(v); -}]>; - -def s11_3ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,3>(v); -}]>; - -def s10ImmPred : PatLeaf<(i32 imm), [{ +def s10_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<10>(v); }]>; -def s8ImmPred : PatLeaf<(i32 imm), [{ +def s8_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; -def s8Imm64Pred : PatLeaf<(i64 imm), [{ +def s8_0Imm64Pred : PatLeaf<(i64 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; -def s6ImmPred : PatLeaf<(i32 imm), [{ +def s6_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<6>(v); }]>; @@ -211,92 +173,31 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4,3>(v); }]>; -def u64ImmPred : PatLeaf<(i64 imm), [{ - // Adding "N ||" to suppress gcc unused warning. - return (N || true); -}]>; - -def u32ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<32>(v); -}]>; - def u32_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<32>(v); }]>; -def u31_1ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<31,1>(v); -}]>; - -def u30_2ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<30,2>(v); -}]>; - -def u29_3ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<29,3>(v); -}]>; - -def u26_6ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<26,6>(v); -}]>; - def u16_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; -def u16_1ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<16,1>(v); -}]>; - -def u16_2ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<16,2>(v); -}]>; - def u11_3ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<11,3>(v); }]>; -def u10ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<10>(v); -}]>; - -def u9ImmPred : PatLeaf<(i32 imm), [{ +def u9_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<9>(v); }]>; -def u8ImmPred : PatLeaf<(i32 imm), [{ +def u8_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<8>(v); }]>; -def u7StrictPosImmPred : ImmLeaf<i32, [{ - // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit - // unsigned field and is strictly greater than 0. - return isUInt<7>(Imm) && Imm > 0; -}]>; - -def u7ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<7>(v); -}]>; - -def u6ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<6>(v); -}]>; - def u6_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<6>(v); @@ -312,182 +213,87 @@ def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6,2>(v); }]>; -def u6_3ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<6,3>(v); -}]>; - -def u5ImmPred : PatLeaf<(i32 imm), [{ +def u5_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<5>(v); }]>; -def u4ImmPred : PatLeaf<(i32 imm), [{ +def u4_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<4>(v); }]>; -def u3ImmPred : PatLeaf<(i32 imm), [{ +def u3_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<3>(v); }]>; -def u2ImmPred : PatLeaf<(i32 imm), [{ +def u2_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<2>(v); }]>; -def u1ImmPred : PatLeaf<(i1 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<1>(v); -}]>; - -def u1ImmPred32 : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<1>(v); -}]>; - -def m5ImmPred : PatLeaf<(i32 imm), [{ - // m5ImmPred predicate - True if the number is in range -1 .. -31 - // and will fit in a 5 bit field when made positive, for use in memops. - int64_t v = (int64_t)N->getSExtValue(); - return (-31 <= v && v <= -1); -}]>; - -//InN means negative integers in [-(2^N - 1), 0] -def n8ImmPred : PatLeaf<(i32 imm), [{ - // n8ImmPred predicate - True if the immediate fits in a 8-bit signed - // field. - int64_t v = (int64_t)N->getSExtValue(); - return (-255 <= v && v <= 0); -}]>; - -def nOneImmPred : PatLeaf<(i32 imm), [{ - // nOneImmPred predicate - True if the immediate is -1. - int64_t v = (int64_t)N->getSExtValue(); - return (-1 == v); -}]>; - -def Set5ImmPred : PatLeaf<(i32 imm), [{ - // Set5ImmPred predicate - True if the number is in the series of values. - // [ 2^0, 2^1, ... 2^31 ] - // For use in setbit immediate. - uint32_t v = (int32_t)N->getSExtValue(); - // Constrain to 32 bits, and then check for single bit. - return ImmIsSingleBit(v); -}]>; - -def Clr5ImmPred : PatLeaf<(i32 imm), [{ - // Clr5ImmPred predicate - True if the number is in the series of - // bit negated values. - // [ 2^0, 2^1, ... 2^31 ] - // For use in clrbit immediate. - // Note: we are bit NOTing the value. - uint32_t v = ~ (int32_t)N->getSExtValue(); - // Constrain to 32 bits, and then check for single bit. - return ImmIsSingleBit(v); -}]>; - // Extendable immediate operands. def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; } -def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; } -def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; } -def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; } -def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; } -def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; } -def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; } -def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; } +def s16_0ExtOperand : AsmOperandClass { let Name = "s16_0Ext"; } +def s12_0ExtOperand : AsmOperandClass { let Name = "s12_0Ext"; } +def s10_0ExtOperand : AsmOperandClass { let Name = "s10_0Ext"; } +def s9_0ExtOperand : AsmOperandClass { let Name = "s9_0Ext"; } +def s8_0ExtOperand : AsmOperandClass { let Name = "s8_0Ext"; } +def s7_0ExtOperand : AsmOperandClass { let Name = "s7_0Ext"; } +def s6_0ExtOperand : AsmOperandClass { let Name = "s6_0Ext"; } def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; } def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; } def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; } def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; } -def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; } -def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; } -def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; } -def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; } -def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; } def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; } +def u7_0ExtOperand : AsmOperandClass { let Name = "u7_0Ext"; } +def u8_0ExtOperand : AsmOperandClass { let Name = "u8_0Ext"; } +def u9_0ExtOperand : AsmOperandClass { let Name = "u9_0Ext"; } +def u10_0ExtOperand : AsmOperandClass { let Name = "u10_0Ext"; } def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; } def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; } def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; } -def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; } +def u32_0MustExtOperand : AsmOperandClass { let Name = "u32_0MustExt"; } let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand", DecoderMethod = "unsignedImmDecoder" in { def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; } - def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand; - let DecoderMethod = "s16ImmDecoder"; } - def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand; - let DecoderMethod = "s12ImmDecoder"; } + def s16_0Ext : Operand<i32> { let ParserMatchClass = s16_0ExtOperand; + let DecoderMethod = "s16_0ImmDecoder"; } + def s12_0Ext : Operand<i32> { let ParserMatchClass = s12_0ExtOperand; + let DecoderMethod = "s12_0ImmDecoder"; } def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand; - let DecoderMethod = "s11_0ImmDecoder"; } + let DecoderMethod = "s11_0ImmDecoder"; } def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand; - let DecoderMethod = "s11_1ImmDecoder"; } + let DecoderMethod = "s11_1ImmDecoder"; } def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand; - let DecoderMethod = "s11_2ImmDecoder"; } + let DecoderMethod = "s11_2ImmDecoder"; } def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand; - let DecoderMethod = "s11_3ImmDecoder"; } - def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand; - let DecoderMethod = "s10ImmDecoder"; } - def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand; - let DecoderMethod = "s90ImmDecoder"; } - def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand; - let DecoderMethod = "s8ImmDecoder"; } - def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; } - def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand; - let DecoderMethod = "s6_0ImmDecoder"; } - def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; } - def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; } - def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; } - def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; } - def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; } + let DecoderMethod = "s11_3ImmDecoder"; } + def s10_0Ext : Operand<i32> { let ParserMatchClass = s10_0ExtOperand; + let DecoderMethod = "s10_0ImmDecoder"; } + def s9_0Ext : Operand<i32> { let ParserMatchClass = s9_0ExtOperand; + let DecoderMethod = "s9_0ImmDecoder"; } + def s8_0Ext : Operand<i32> { let ParserMatchClass = s8_0ExtOperand; + let DecoderMethod = "s8_0ImmDecoder"; } + def s7_0Ext : Operand<i32> { let ParserMatchClass = s7_0ExtOperand; } + def s6_0Ext : Operand<i32> { let ParserMatchClass = s6_0ExtOperand; + let DecoderMethod = "s6_0ImmDecoder"; } + def u7_0Ext : Operand<i32> { let ParserMatchClass = u7_0ExtOperand; } + def u8_0Ext : Operand<i32> { let ParserMatchClass = u8_0ExtOperand; } + def u9_0Ext : Operand<i32> { let ParserMatchClass = u9_0ExtOperand; } + def u10_0Ext : Operand<i32> { let ParserMatchClass = u10_0ExtOperand; } def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; } def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; } def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; } def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; } - def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; } + def u32_0MustExt : Operand<i32> { let ParserMatchClass = u32_0MustExtOperand; } } -def s4_7ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (HST->hasV60TOps()) - // Return true if the immediate can fit in a 10-bit sign extended field and - // is 128-byte aligned. - return isShiftedInt<4,7>(v); - return false; -}]>; - -def s3_7ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (HST->hasV60TOps()) - // Return true if the immediate can fit in a 9-bit sign extended field and - // is 128-byte aligned. - return isShiftedInt<3,7>(v); - return false; -}]>; - -def s4_6ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (HST->hasV60TOps()) - // Return true if the immediate can fit in a 10-bit sign extended field and - // is 64-byte aligned. - return isShiftedInt<4,6>(v); - return false; -}]>; - -def s3_6ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (HST->hasV60TOps()) - // Return true if the immediate can fit in a 9-bit sign extended field and - // is 64-byte aligned. - return isShiftedInt<3,6>(v); - return false; -}]>; - - // This complex pattern exists only to create a machine instruction operand // of type "frame index". There doesn't seem to be a way to do that directly // in the patterns. @@ -524,12 +330,3 @@ def calltarget : Operand<i32> { def bblabel : Operand<i32>; def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">; - -// Return true if for a 32 to 64-bit sign-extended load. -def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{ - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD) - return false; - return LD->getExtensionType() == ISD::SEXTLOAD && - LD->getMemoryVT().getScalarType() == MVT::i32; -}]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 4dff0db..89db467 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -12,24 +12,30 @@ #define DEBUG_TYPE "opt-addr-mode" -#include "HexagonTargetMachine.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "MCTargetDesc/HexagonBaseInfo.h" #include "RDFGraph.h" #include "RDFLiveness.h" - #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstdint> +#include <map> static cl::opt<int> CodeGrowthLimit("hexagon-amode-growth-limit", cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode " @@ -39,28 +45,36 @@ using namespace llvm; using namespace rdf; namespace llvm { + FunctionPass *createHexagonOptAddrMode(); void initializeHexagonOptAddrModePass(PassRegistry &); -} + +} // end namespace llvm namespace { + class HexagonOptAddrMode : public MachineFunctionPass { public: static char ID; + HexagonOptAddrMode() - : MachineFunctionPass(ID), HII(0), MDT(0), DFG(0), LV(0) { + : MachineFunctionPass(ID), HII(nullptr), MDT(nullptr), DFG(nullptr), + LV(nullptr) { PassRegistry &R = *PassRegistry::getPassRegistry(); initializeHexagonOptAddrModePass(R); } - const char *getPassName() const override { + + StringRef getPassName() const override { return "Optimize addressing mode of load/store"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineDominanceFrontier>(); AU.setPreservesAll(); } + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -79,12 +93,12 @@ private: NodeAddr<UseNode *> UseN, unsigned UseMOnum); bool analyzeUses(unsigned DefR, const NodeList &UNodeList, InstrEvalMap &InstrEvalResult, short &SizeInc); - bool hasRepForm(MachineInstr *MI, unsigned TfrDefR); - bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr *MI, + bool hasRepForm(MachineInstr &MI, unsigned TfrDefR); + bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr &MI, const NodeList &UNodeList); void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList); bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList); - short getBaseWithLongOffset(const MachineInstr *MI) const; + short getBaseWithLongOffset(const MachineInstr &MI) const; void updateMap(NodeAddr<InstrNode *> IA); bool constructDefMap(MachineBasicBlock *B); bool changeStore(MachineInstr *OldMI, MachineOperand ImmOp, @@ -93,7 +107,8 @@ private: bool changeAddAsl(NodeAddr<UseNode *> AddAslUN, MachineInstr *AddAslMI, const MachineOperand &ImmOp, unsigned ImmOpNum); }; -} + +} // end anonymous namespace char HexagonOptAddrMode::ID = 0; @@ -104,14 +119,14 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) INITIALIZE_PASS_END(HexagonOptAddrMode, "opt-amode", "Optimize addressing mode", false, false) -bool HexagonOptAddrMode::hasRepForm(MachineInstr *MI, unsigned TfrDefR) { - const MCInstrDesc &MID = MI->getDesc(); +bool HexagonOptAddrMode::hasRepForm(MachineInstr &MI, unsigned TfrDefR) { + const MCInstrDesc &MID = MI.getDesc(); - if ((!MID.mayStore() && !MID.mayLoad()) || HII->isPredicated(*MI)) + if ((!MID.mayStore() && !MID.mayLoad()) || HII->isPredicated(MI)) return false; if (MID.mayStore()) { - MachineOperand StOp = MI->getOperand(MI->getNumOperands() - 1); + MachineOperand StOp = MI.getOperand(MI.getNumOperands() - 1); if (StOp.isReg() && StOp.getReg() == TfrDefR) return false; } @@ -137,18 +152,18 @@ bool HexagonOptAddrMode::hasRepForm(MachineInstr *MI, unsigned TfrDefR) { // Above three instructions can be replaced with Rd = memw(Rt<<#2 + ##foo+28) bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, - MachineInstr *MI, + MachineInstr &MI, const NodeList &UNodeList) { // check offset size in addasl. if 'offset > 3' return false - const MachineOperand &OffsetOp = MI->getOperand(3); + const MachineOperand &OffsetOp = MI.getOperand(3); if (!OffsetOp.isImm() || OffsetOp.getImm() > 3) return false; - unsigned OffsetReg = MI->getOperand(2).getReg(); + unsigned OffsetReg = MI.getOperand(2).getReg(); RegisterRef OffsetRR; NodeId OffsetRegRD = 0; for (NodeAddr<UseNode *> UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) { - RegisterRef RR = UA.Addr->getRegRef(); + RegisterRef RR = UA.Addr->getRegRef(*DFG); if (OffsetReg == RR.Reg) { OffsetRR = RR; OffsetRegRD = UA.Addr->getReachingDef(); @@ -162,25 +177,25 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, RDefMap[OffsetRR][IA.Id] != OffsetRegRD) return false; - MachineInstr *UseMI = NodeAddr<StmtNode *>(IA).Addr->getCode(); + MachineInstr &UseMI = *NodeAddr<StmtNode *>(IA).Addr->getCode(); NodeAddr<DefNode *> OffsetRegDN = DFG->addr<DefNode *>(OffsetRegRD); // Reaching Def to an offset register can't be a phi. if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) && - MI->getParent() != UseMI->getParent()) + MI.getParent() != UseMI.getParent()) return false; - const MCInstrDesc &UseMID = UseMI->getDesc(); + const MCInstrDesc &UseMID = UseMI.getDesc(); if ((!UseMID.mayLoad() && !UseMID.mayStore()) || HII->getAddrMode(UseMI) != HexagonII::BaseImmOffset || getBaseWithLongOffset(UseMI) < 0) return false; // Addasl output can't be a store value. - if (UseMID.mayStore() && UseMI->getOperand(2).isReg() && - UseMI->getOperand(2).getReg() == MI->getOperand(0).getReg()) + if (UseMID.mayStore() && UseMI.getOperand(2).isReg() && + UseMI.getOperand(2).getReg() == MI.getOperand(0).getReg()) return false; - for (auto &Mo : UseMI->operands()) + for (auto &Mo : UseMI.operands()) if (Mo.isFI()) return false; } @@ -191,7 +206,7 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList) { for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr<UseNode *> UN = *I; - RegisterRef UR = UN.Addr->getRegRef(); + RegisterRef UR = UN.Addr->getRegRef(*DFG); NodeSet Visited, Defs; const auto &ReachingDefs = LV->getAllReachingDefsRec(UR, UN, Visited, Defs); if (ReachingDefs.size() > 1) { @@ -215,7 +230,8 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA, for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG->IsDef, *DFG)) { DEBUG(dbgs() << "\t\t[DefNode]: " << Print<NodeAddr<DefNode *>>(DA, *DFG) << "\n"); - RegisterRef DR = DA.Addr->getRegRef(); + RegisterRef DR = DFG->normalizeRef(DA.Addr->getRegRef(*DFG)); + auto UseSet = LV->getAllReachedUses(DR, DA); for (auto UI : UseSet) { @@ -232,13 +248,13 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA, const Liveness::RefMap &phiUse = LV->getRealUses(id); DEBUG(dbgs() << "\t\t\t\tphi real Uses" << Print<Liveness::RefMap>(phiUse, *DFG) << "\n"); - if (phiUse.size() > 0) { + if (!phiUse.empty()) { for (auto I : phiUse) { - if (DR != I.first) + if (DR.Reg != I.first) continue; auto phiUseSet = I.second; for (auto phiUI : phiUseSet) { - NodeAddr<UseNode *> phiUA = DFG->addr<UseNode *>(phiUI); + NodeAddr<UseNode *> phiUA = DFG->addr<UseNode *>(phiUI.first); UNodeList.push_back(phiUA); } } @@ -261,8 +277,8 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR, bool CanBeReplaced = false; NodeAddr<UseNode *> UN = *I; NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG); - MachineInstr *MI = SN.Addr->getCode(); - const MCInstrDesc &MID = MI->getDesc(); + MachineInstr &MI = *SN.Addr->getCode(); + const MCInstrDesc &MID = MI.getDesc(); if ((MID.mayLoad() || MID.mayStore())) { if (!hasRepForm(MI, tfrDefR)) { KeepTfr = true; @@ -270,10 +286,10 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR, } SizeInc++; CanBeReplaced = true; - } else if (MI->getOpcode() == Hexagon::S2_addasl_rrri) { + } else if (MI.getOpcode() == Hexagon::S2_addasl_rrri) { NodeList AddaslUseList; - DEBUG(dbgs() << "\nGetting ReachedUses for === " << *MI << "\n"); + DEBUG(dbgs() << "\nGetting ReachedUses for === " << MI << "\n"); getAllRealUses(SN, AddaslUseList); // Process phi nodes. if (allValidCandidates(SN, AddaslUseList) && @@ -290,7 +306,7 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR, // M4_mpyrr_addr -> M4_mpyrr_addi KeepTfr = true; - InstrEvalResult[MI] = CanBeReplaced; + InstrEvalResult[&MI] = CanBeReplaced; HasRepInstr |= CanBeReplaced; } @@ -313,8 +329,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, MachineInstrBuilder MIB; if (ImmOpNum == 1) { - if (HII->getAddrMode(OldMI) == HexagonII::BaseRegOffset) { - short NewOpCode = HII->getBaseWithLongOffset(OldMI); + if (HII->getAddrMode(*OldMI) == HexagonII::BaseRegOffset) { + short NewOpCode = HII->getBaseWithLongOffset(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.addOperand(OldMI->getOperand(0)); @@ -323,8 +339,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, MIB.addOperand(ImmOp); OpStart = 4; Changed = true; - } else if (HII->getAddrMode(OldMI) == HexagonII::BaseImmOffset) { - short NewOpCode = HII->getAbsoluteForm(OldMI); + } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) { + short NewOpCode = HII->getAbsoluteForm(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)) .addOperand(OldMI->getOperand(0)); @@ -340,7 +356,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); DEBUG(dbgs() << "[TO]: " << MIB << "\n"); } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) { - short NewOpCode = HII->xformRegToImmOffset(OldMI); + short NewOpCode = HII->xformRegToImmOffset(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.addOperand(OldMI->getOperand(0)); @@ -370,8 +386,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, ++InsertPt; MachineInstrBuilder MIB; if (ImmOpNum == 0) { - if (HII->getAddrMode(OldMI) == HexagonII::BaseRegOffset) { - short NewOpCode = HII->getBaseWithLongOffset(OldMI); + if (HII->getAddrMode(*OldMI) == HexagonII::BaseRegOffset) { + short NewOpCode = HII->getBaseWithLongOffset(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.addOperand(OldMI->getOperand(1)); @@ -379,8 +395,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, MIB.addOperand(ImmOp); MIB.addOperand(OldMI->getOperand(3)); OpStart = 4; - } else if (HII->getAddrMode(OldMI) == HexagonII::BaseImmOffset) { - short NewOpCode = HII->getAbsoluteForm(OldMI); + } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) { + short NewOpCode = HII->getAbsoluteForm(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); const GlobalValue *GV = ImmOp.getGlobal(); @@ -393,7 +409,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); DEBUG(dbgs() << "[TO]: " << MIB << "\n"); } else if (ImmOpNum == 1 && OldMI->getOperand(2).getImm() == 0) { - short NewOpCode = HII->xformRegToImmOffset(OldMI); + short NewOpCode = HII->xformRegToImmOffset(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.addOperand(OldMI->getOperand(0)); @@ -411,7 +427,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, return Changed; } -short HexagonOptAddrMode::getBaseWithLongOffset(const MachineInstr *MI) const { +short HexagonOptAddrMode::getBaseWithLongOffset(const MachineInstr &MI) const { if (HII->getAddrMode(MI) == HexagonII::BaseImmOffset) { short TempOpCode = HII->getBaseWithRegOffset(MI); return HII->getBaseWithLongOffset(TempOpCode); @@ -442,11 +458,11 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN, DEBUG(dbgs() << "[MI <BB#" << UseMI->getParent()->getNumber() << ">]: " << *UseMI << "\n"); const MCInstrDesc &UseMID = UseMI->getDesc(); - assert(HII->getAddrMode(UseMI) == HexagonII::BaseImmOffset); + assert(HII->getAddrMode(*UseMI) == HexagonII::BaseImmOffset); auto UsePos = MachineBasicBlock::iterator(UseMI); MachineBasicBlock::instr_iterator InsertPt = UsePos.getInstrIterator(); - short NewOpCode = getBaseWithLongOffset(UseMI); + short NewOpCode = getBaseWithLongOffset(*UseMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); unsigned OpStart; @@ -575,7 +591,7 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { void HexagonOptAddrMode::updateMap(NodeAddr<InstrNode *> IA) { RegisterSet RRs; for (NodeAddr<RefNode *> RA : IA.Addr->members(*DFG)) - RRs.insert(RA.Addr->getRegRef()); + RRs.insert(RA.Addr->getRegRef(*DFG)); bool Common = false; for (auto &R : RDefMap) { if (!RRs.count(R.first)) @@ -587,7 +603,7 @@ void HexagonOptAddrMode::updateMap(NodeAddr<InstrNode *> IA) { return; for (auto &R : RDefMap) { - auto F = DefM.find(R.first); + auto F = DefM.find(R.first.Reg); if (F == DefM.end() || F->second.empty()) continue; R.second[IA.Id] = F->second.top()->Id; @@ -622,8 +638,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { const auto &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetOperandInfo TOI(*HII); - RegisterAliasInfo RAI(TRI); - DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, RAI, TOI); + DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI); G.build(); DFG = &G; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp index 7937a79..101de3d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -38,13 +37,9 @@ namespace { } bool runOnFunction(Function &F) override; - const char *getPassName() const override { - return "Remove sign extends"; - } + StringRef getPassName() const override { return "Remove sign extends"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineFunctionAnalysis>(); - AU.addPreserved<MachineFunctionAnalysis>(); AU.addPreserved<StackProtector>(); FunctionPass::getAnalysisUsage(AU); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td new file mode 100644 index 0000000..ad81287 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -0,0 +1,3347 @@ +// Pattern fragment that combines the value type and the register class +// into a single parameter. +// The pat frags in the definitions below need to have a named register, +// otherwise i32 will be assumed regardless of the register class. The +// name of the register does not matter. +def I1 : PatLeaf<(i1 PredRegs:$R)>; +def I32 : PatLeaf<(i32 IntRegs:$R)>; +def I64 : PatLeaf<(i64 DoubleRegs:$R)>; +def F32 : PatLeaf<(f32 IntRegs:$R)>; +def F64 : PatLeaf<(f64 DoubleRegs:$R)>; + +// Pattern fragments to extract the low and high subregisters from a +// 64-bit value. +def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; +def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; + +def IsOrAdd: PatFrag<(ops node:$Addr, node:$off), + (or node:$Addr, node:$off), [{ return isOrEquivalentToAdd(N); }]>; + +def IsPow2_32 : PatLeaf<(i32 imm), [{ + uint32_t V = N->getZExtValue(); + return isPowerOf2_32(V); +}]>; + +def IsPow2_64 : PatLeaf<(i64 imm), [{ + uint64_t V = N->getZExtValue(); + return isPowerOf2_64(V); +}]>; + +def IsNPow2_32 : PatLeaf<(i32 imm), [{ + uint32_t NV = ~N->getZExtValue(); + return isPowerOf2_32(NV); +}]>; + +def IsPow2_64L : PatLeaf<(i64 imm), [{ + uint64_t V = N->getZExtValue(); + return isPowerOf2_64(V) && Log2_64(V) < 32; +}]>; + +def IsPow2_64H : PatLeaf<(i64 imm), [{ + uint64_t V = N->getZExtValue(); + return isPowerOf2_64(V) && Log2_64(V) >= 32; +}]>; + +def IsNPow2_64L : PatLeaf<(i64 imm), [{ + uint64_t NV = ~N->getZExtValue(); + return isPowerOf2_64(NV) && Log2_64(NV) < 32; +}]>; + +def IsNPow2_64H : PatLeaf<(i64 imm), [{ + uint64_t NV = ~N->getZExtValue(); + return isPowerOf2_64(NV) && Log2_64(NV) >= 32; +}]>; + +def SDEC1 : SDNodeXForm<imm, [{ + int32_t V = N->getSExtValue(); + return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); +}]>; + +def UDEC1 : SDNodeXForm<imm, [{ + uint32_t V = N->getZExtValue(); + assert(V >= 1); + return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); +}]>; + +def UDEC32 : SDNodeXForm<imm, [{ + uint32_t V = N->getZExtValue(); + assert(V >= 32); + return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); +}]>; + +def Log2_32 : SDNodeXForm<imm, [{ + uint32_t V = N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); +}]>; + +def Log2_64 : SDNodeXForm<imm, [{ + uint64_t V = N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32); +}]>; + +def LogN2_32 : SDNodeXForm<imm, [{ + uint32_t NV = ~N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); +}]>; + +def LogN2_64 : SDNodeXForm<imm, [{ + uint64_t NV = ~N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32); +}]>; + + +class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred> + : Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)), + (MI IntRegs:$src1, ImmPred:$src2)>; + +def : T_CMP_pat <C2_cmpeqi, seteq, s10_0ImmPred>; +def : T_CMP_pat <C2_cmpgti, setgt, s10_0ImmPred>; +def : T_CMP_pat <C2_cmpgtui, setugt, u9_0ImmPred>; + +def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; + +def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; + +// Pats for instruction selection. +class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT> + : Pat<(ResT (Op I32:$Rs, I32:$Rt)), + (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>; + +def: BinOp32_pat<add, A2_add, i32>; +def: BinOp32_pat<and, A2_and, i32>; +def: BinOp32_pat<or, A2_or, i32>; +def: BinOp32_pat<sub, A2_sub, i32>; +def: BinOp32_pat<xor, A2_xor, i32>; + +def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>; +def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>; + +// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones +// that reverse the order of the operands. +class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; + +// Pats for compares. They use PatFrags as operands, not SDNodes, +// since seteq/setgt/etc. are defined as ParFrags. +class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT> + : Pat<(VT (Op I32:$Rs, I32:$Rt)), + (MI IntRegs:$Rs, IntRegs:$Rt)>; + +def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>; +def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>; +def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>; + +def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>; +def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>; + +def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt), + (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(add I32:$Rs, s32_0ImmPred:$s16), + (A2_addi I32:$Rs, imm:$s16)>; + +def: Pat<(or I32:$Rs, s32_0ImmPred:$s10), + (A2_orir IntRegs:$Rs, imm:$s10)>; +def: Pat<(and I32:$Rs, s32_0ImmPred:$s10), + (A2_andir IntRegs:$Rs, imm:$s10)>; + +def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs), + (A2_subri imm:$s10, IntRegs:$Rs)>; + +// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). +def: Pat<(not I32:$src1), + (A2_subri -1, IntRegs:$src1)>; + +def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; +def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>; + +def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs), + (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; + +def : Pat<(select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8), + (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; + +def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8), + (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; + +def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; +def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; + +class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T> + : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), + (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; + +def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>; +def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>; +def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>; +def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>; +def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>; +def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>; +def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>; +def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; + +// Add halfword. +def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), + (A2_addh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), + (A2_addh_l16_hl I32:$src1, I32:$src2)>; + +def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), + (A2_addh_h16_ll I32:$src1, I32:$src2)>; + +// Subtract halfword. +def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), + (A2_subh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), + (A2_subh_h16_ll I32:$src1, I32:$src2)>; + +// Here, depending on the operand being selected, we'll either generate a +// min or max instruction. +// Ex: +// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected +// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. +// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value +// is selected and the corresponding HexagonInst is passed in 'SwapInst'. + +multiclass T_MinMax_pats <PatFrag Op, PatLeaf Val, + InstHexagon Inst, InstHexagon SwapInst> { + def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src1, Val:$src2), + (Inst Val:$src1, Val:$src2)>; + def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src2, Val:$src1), + (SwapInst Val:$src1, Val:$src2)>; +} + +def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{ + return isPositiveHalfWord(N); +}]>; + +multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { + defm: T_MinMax_pats<Op, I32, Inst, SwapInst>; + + def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)), + IsPosHalf:$src1, IsPosHalf:$src2), + i16), + (Inst IntRegs:$src1, IntRegs:$src2)>; + + def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)), + IsPosHalf:$src2, IsPosHalf:$src1), + i16), + (SwapInst IntRegs:$src1, IntRegs:$src2)>; +} + +let AddedComplexity = 200 in { + defm: MinMax_pats<setge, A2_max, A2_min>; + defm: MinMax_pats<setgt, A2_max, A2_min>; + defm: MinMax_pats<setle, A2_min, A2_max>; + defm: MinMax_pats<setlt, A2_min, A2_max>; + defm: MinMax_pats<setuge, A2_maxu, A2_minu>; + defm: MinMax_pats<setugt, A2_maxu, A2_minu>; + defm: MinMax_pats<setule, A2_minu, A2_maxu>; + defm: MinMax_pats<setult, A2_minu, A2_maxu>; +} + +class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp> + : Pat<(i1 (CmpOp I64:$Rs, I64:$Rt)), + (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; + +def: T_cmp64_rr_pat<C2_cmpeqp, seteq>; +def: T_cmp64_rr_pat<C2_cmpgtp, setgt>; +def: T_cmp64_rr_pat<C2_cmpgtup, setugt>; +def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>; +def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>; + +def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; + +def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; + +def: Pat<(i1 (not I1:$Ps)), (C2_not PredRegs:$Ps)>; + +def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; + +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; + +def: Pat<(br bb:$dst), (J2_jump brtarget:$dst)>; +def: Pat<(brcond I1:$src1, bb:$block), (J2_jumpt PredRegs:$src1, bb:$block)>; +def: Pat<(brind I32:$dst), (J2_jumpr IntRegs:$dst)>; + +def: Pat<(retflag), (PS_jmpret (i32 R31))>; +def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; + +// Patterns to select load-indexed (i.e. load from base+offset). +multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, + InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>; +} + +let AddedComplexity = 20 in { + defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>; + defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>; + + defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>; + defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>; + defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + // No sextloadi1. +} + +// Sign-extending loads of i1 need to replicate the lowest bit throughout +// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should +// do the trick. +let AddedComplexity = 20 in +def: Pat<(i32 (sextloadi1 I32:$Rs)), + (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; + +def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; + +def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8), + (M2_mpysip IntRegs:$Rs, imm:$u8)>; +def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)), + (M2_mpysin IntRegs:$Rs, imm:$u8)>; +def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2), + (M2_mpysmi IntRegs:$src1, imm:$src2)>; +def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), + (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; +def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), + (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), + (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; +def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), + (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp, + PatLeaf ImmPred> + : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), + (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; + +class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), + (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; +def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32_0ImmPred>; + +def : T_MType_acc_pat1 <M2_naccii, add, sub, s32_0ImmPred>; +def : T_MType_acc_pat2 <M2_nacci, add, sub>; + +def: T_MType_acc_pat2 <M4_or_xor, xor, or>; +def: T_MType_acc_pat2 <M4_and_xor, xor, and>; +def: T_MType_acc_pat2 <M4_or_and, and, or>; +def: T_MType_acc_pat2 <M4_and_and, and, and>; +def: T_MType_acc_pat2 <M4_xor_and, and, xor>; +def: T_MType_acc_pat2 <M4_or_or, or, or>; +def: T_MType_acc_pat2 <M4_and_or, or, and>; +def: T_MType_acc_pat2 <M4_xor_or, or, xor>; + +class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp> + : Pat <(secOp I32:$src1, (firstOp I32:$src2, (not I32:$src3))), + (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: T_MType_acc_pat3 <M4_or_andn, and, or>; +def: T_MType_acc_pat3 <M4_and_andn, and, and>; +def: T_MType_acc_pat3 <M4_xor_andn, and, xor>; + +def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; +def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>; +def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; + +// Return true if for a 32 to 64-bit sign-extended load. +def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{ + LoadSDNode *LD = dyn_cast<LoadSDNode>(N); + if (!LD) + return false; + return LD->getExtensionType() == ISD::SEXTLOAD && + LD->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def: Pat<(mul (Aext64 I32:$src1), (Aext64 I32:$src2)), + (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(mul (Sext64 I32:$src1), (Sext64 I32:$src2)), + (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(mul Sext64Ld:$src1, Sext64Ld:$src2), + (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; + +// Multiply and accumulate, use full result. +// Rxx[+-]=mpy(Rs,Rt) + +def: Pat<(add I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), + (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(sub I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), + (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(add I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(add I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(sub I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(sub I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, + InstHexagon MI> + : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), + (MI I32:$src2, imm:$offset, Value:$src1)>; + +def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>; +def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>; +def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>; +def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>; + +// Patterns for generating stores, where the address takes different forms: +// - frameindex, +// - frameindex + offset, +// - base + offset, +// - simple (base address without offset). +// These would usually be used together (via Storex_pat defined below), but +// in some cases one may want to apply different properties (such as +// AddedComplexity) to the individual patterns. +class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; +multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; + def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; +} +multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> { + def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; + def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +} +class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rt, I32:$Rs), + (MI IntRegs:$Rs, 0, Value:$Rt)>; + +// Patterns for generating stores, where the address takes different forms, +// and where the value being stored is transformed through the value modifier +// ValueMod. The address forms are same as above. +class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), + (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; +multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; + def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; +} +multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> { + def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; + def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +} +class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rt, I32:$Rs), + (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; + +multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + InstHexagon MI> { + def: Storex_fi_pat <Store, Value, MI>; + defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>; + defm: Storex_add_pat <Store, Value, ImmPred, MI>; +} + +multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + PatFrag ValueMod, InstHexagon MI> { + def: Storexm_fi_pat <Store, Value, ValueMod, MI>; + defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>; + defm: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; +} + +// Regular stores in the DAG have two operands: value and address. +// Atomic stores also have two, but they are reversed: address, value. +// To use atomic stores with the patterns, they need to have their operands +// swapped. This relies on the knowledge that the F.Fragment uses names +// "ptr" and "val". +class SwapSt<PatFrag F> + : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + F.OperandTransform>; + +let AddedComplexity = 20 in { + defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>; + + defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>; +} + +// Simple patterns should be tried with the least priority. +def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>; +def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>; +def: Storex_simple_pat<store, I32, S2_storeri_io>; +def: Storex_simple_pat<store, I64, S2_storerd_io>; + +def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>; +def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>; +def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>; +def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>; + +let AddedComplexity = 20 in { + defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>; + defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>; + defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>; +} + +def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; +def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>; +def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>; + +def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>; + +def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src), + (A2_abs IntRegs:$src)>; + +let AddedComplexity = 50 in +def: Pat<(xor (add (sra I32:$src, (i32 31)), + I32:$src), + (sra I32:$src, (i32 31))), + (A2_abs IntRegs:$src)>; + +def: Pat<(sra I32:$src, u5_0ImmPred:$u5), + (S2_asr_i_r IntRegs:$src, imm:$u5)>; +def: Pat<(srl I32:$src, u5_0ImmPred:$u5), + (S2_lsr_i_r IntRegs:$src, imm:$u5)>; +def: Pat<(shl I32:$src, u5_0ImmPred:$u5), + (S2_asl_i_r IntRegs:$src, imm:$u5)>; + +def: Pat<(sra (add (sra I32:$src1, u5_0ImmPred:$src2), 1), (i32 1)), + (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>; + +def : Pat<(not I64:$src1), + (A2_notp DoubleRegs:$src1)>; + +// Count leading zeros. +def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; + +// Count trailing zeros: 32-bit. +def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; + +// Count leading ones. +def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; + +// Count trailing ones: 32-bit. +def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; + +let AddedComplexity = 20 in { // Complexity greater than and/or/xor + def: Pat<(and I32:$Rs, IsNPow2_32:$V), + (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>; + def: Pat<(or I32:$Rs, IsPow2_32:$V), + (S2_setbit_i IntRegs:$Rs, (Log2_32 $V))>; + def: Pat<(xor I32:$Rs, IsPow2_32:$V), + (S2_togglebit_i IntRegs:$Rs, (Log2_32 $V))>; + + def: Pat<(and I32:$Rs, (not (shl 1, I32:$Rt))), + (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(or I32:$Rs, (shl 1, I32:$Rt)), + (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(xor I32:$Rs, (shl 1, I32:$Rt)), + (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; +} + +// Clr/set/toggle bit for 64-bit values with immediate bit index. +let AddedComplexity = 20 in { // Complexity greater than and/or/xor + def: Pat<(and I64:$Rss, IsNPow2_64L:$V), + (REG_SEQUENCE DoubleRegs, + (i32 (HiReg $Rss)), isub_hi, + (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)), isub_lo)>; + def: Pat<(and I64:$Rss, IsNPow2_64H:$V), + (REG_SEQUENCE DoubleRegs, + (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))), + isub_hi, + (i32 (LoReg $Rss)), isub_lo)>; + + def: Pat<(or I64:$Rss, IsPow2_64L:$V), + (REG_SEQUENCE DoubleRegs, + (i32 (HiReg $Rss)), isub_hi, + (S2_setbit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>; + def: Pat<(or I64:$Rss, IsPow2_64H:$V), + (REG_SEQUENCE DoubleRegs, + (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), + isub_hi, + (i32 (LoReg $Rss)), isub_lo)>; + + def: Pat<(xor I64:$Rss, IsPow2_64L:$V), + (REG_SEQUENCE DoubleRegs, + (i32 (HiReg $Rss)), isub_hi, + (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>; + def: Pat<(xor I64:$Rss, IsPow2_64H:$V), + (REG_SEQUENCE DoubleRegs, + (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), + isub_hi, + (i32 (LoReg $Rss)), isub_lo)>; +} + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), + (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; + def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (trunc I32:$Rs)), + (S2_tstbit_i IntRegs:$Rs, 0)>; + def: Pat<(i1 (trunc I64:$Rs)), + (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; +} + +let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. + def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), + (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>; + def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)), + (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; +} + +let AddedComplexity = 10 in // Complexity greater than compare reg-reg. +def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)), + (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))), + (i32 8)), + (i32 (zextloadi8 (add I32:$b, 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; + +// Patterns for loads of i1: +def: Pat<(i1 (load AddrFI:$fi)), + (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; +def: Pat<(i1 (load (add I32:$Rs, s32_0ImmPred:$Off))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; +def: Pat<(i1 (load I32:$Rs)), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; + +def I1toI32: OutPatFrag<(ops node:$Rs), + (C2_muxii (i1 $Rs), 1, 0)>; + +def I32toI1: OutPatFrag<(ops node:$Rs), + (i1 (C2_tfrrp (i32 $Rs)))>; + +defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>; +def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; + +def: Pat<(sra I64:$src, u6_0ImmPred:$u6), + (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; +def: Pat<(srl I64:$src, u6_0ImmPred:$u6), + (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>; +def: Pat<(shl I64:$src, u6_0ImmPred:$u6), + (S2_asl_i_p DoubleRegs:$src, imm:$u6)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), + (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; + +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; +def: Pat<(HexagonBARRIER), (Y2_barrier)>; + +def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), + (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>; + + +// Support for generating global address. +// Taken from X86InstrInfo.td. +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; + +// Map TLS addressses to A2_tfrsi. +def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>; +def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>; + +def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; +def: Pat<(i1 0), (PS_false)>; +def: Pat<(i1 1), (PS_true)>; + +// Pseudo instructions. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, +// Optional Flag and Variable Arguments. +// Its 1 Operand has pointer type. +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + + +def: Pat<(callseq_start timm:$amt), + (ADJCALLSTACKDOWN imm:$amt)>; +def: Pat<(callseq_end timm:$amt1, timm:$amt2), + (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; + +//Tail calls. +def: Pat<(HexagonTCRet tglobaladdr:$dst), + (PS_tailcall_i tglobaladdr:$dst)>; +def: Pat<(HexagonTCRet texternalsym:$dst), + (PS_tailcall_i texternalsym:$dst)>; +def: Pat<(HexagonTCRet I32:$dst), + (PS_tailcall_r I32:$dst)>; + +// Map from r0 = and(r1, 65535) to r0 = zxth(r1) +def: Pat<(and I32:$src1, 65535), + (A2_zxth IntRegs:$src1)>; + +// Map from r0 = and(r1, 255) to r0 = zxtb(r1). +def: Pat<(and I32:$src1, 255), + (A2_zxtb IntRegs:$src1)>; + +// Map Add(p1, true) to p1 = not(p1). +// Add(p1, false) should never be produced, +// if it does, it got to be mapped to NOOP. +def: Pat<(add I1:$src1, -1), + (C2_not PredRegs:$src1)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def: Pat<(select (not I1:$src1), s8_0ImmPred:$src2, s32_0ImmPred:$src3), + (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = C2_muxir(p0, r1, #i) +def: Pat<(select (not I1:$src1), s32_0ImmPred:$src2, + I32:$src3), + (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = C2_muxri (p0, #i, r1) +def: Pat<(select (not I1:$src1), IntRegs:$src2, s32_0ImmPred:$src3), + (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>; + +// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. +def: Pat<(brcond (not I1:$src1), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; + +// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). +def: Pat<(i64 (sext_inreg I64:$src1, i32)), + (A2_sxtw (LoReg DoubleRegs:$src1))>; + +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). +def: Pat<(i64 (sext_inreg I64:$src1, i16)), + (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; + +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). +def: Pat<(i64 (sext_inreg I64:$src1, i8)), + (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; + +// We want to prevent emitting pnot's as much as possible. +// Map brcond with an unsupported setcc to a J2_jumpf. +def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)), + bb:$offset), + (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2), + bb:$offset)>; + +def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)), + bb:$offset), + (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>; + +def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; + +def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; + +// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) +def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)), + bb:$offset)>; + +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def: Pat<(select I1:$src1, I64:$src2, + I64:$src3), + (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), + (HiReg DoubleRegs:$src3)), + (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), + (LoReg DoubleRegs:$src3)))>; + +// Map from a 1-bit select to logical ops. +// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). +def: Pat<(select I1:$src1, I1:$src2, I1:$src3), + (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), + (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; + +// Map for truncating from 64 immediates to 32 bit immediates. +def: Pat<(i32 (trunc I64:$src)), + (LoReg DoubleRegs:$src)>; + +// Map for truncating from i64 immediates to i1 bit immediates. +def: Pat<(i1 (trunc I64:$src)), + (C2_tfrrp (LoReg DoubleRegs:$src))>; + +// rs <= rt -> !(rs > rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setle I32:$src1, s32_0ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>; + +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle I32:$src1, I32:$src2)), + (i1 (C2_not (C2_cmpgt I32:$src1, I32:$src2)))>; + +// Rss <= Rtt -> !(Rss > Rtt). +def: Pat<(i1 (setle I64:$src1, I64:$src2)), + (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Map cmpne -> cmpeq. +// Hexagon_TODO: We should improve on this. +// rs != rt -> !(rs == rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)), + (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>; + +// Convert setne back to xor for hexagon since we compute w/ pred registers. +def: Pat<(i1 (setne I1:$src1, I1:$src2)), + (C2_xor PredRegs:$src1, PredRegs:$src2)>; + +// Map cmpne(Rss) -> !cmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne I64:$src1, I64:$src2)), + (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setge I32:$src1, I32:$src2)), + (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>; + +// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) +let AddedComplexity = 30 in +def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)), + (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; + +// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). +// rss >= rtt -> !(rtt > rss). +def: Pat<(i1 (setge I64:$src1, I64:$src2)), + (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). +// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). +// rs < rt -> !(rs >= rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2)))>; + +// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) +def: Pat<(i1 (setuge I32:$src1, 0)), + (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; + +// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) +def: Pat<(i1 (setuge I32:$src1, u32_0ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, (UDEC1 u32_0ImmPred:$src2))>; + +// Generate cmpgtu(Rs, #u9) +def: Pat<(i1 (setugt I32:$src1, u32_0ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def: Pat<(i1 (setuge I64:$src1, I64:$src2)), + (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def: Pat<(i1 (setule I64:$src1, I64:$src2)), + (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Sign extends. +// i1 -> i32 +def: Pat<(i32 (sext I1:$src1)), + (C2_muxii PredRegs:$src1, -1, 0)>; + +// i1 -> i64 +def: Pat<(i64 (sext I1:$src1)), + (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; + +// Zero extends. +// i1 -> i32 +def: Pat<(i32 (zext I1:$src1)), + (C2_muxii PredRegs:$src1, 1, 0)>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def: Pat<(i32 (anyext I1:$src1)), + (C2_muxii PredRegs:$src1, 1, 0)>; + +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def: Pat<(i64 (anyext I1:$src1)), + (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; + +// Clear the sign bit in a 64-bit register. +def ClearSign : OutPatFrag<(ops node:$Rss), + (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>; + +def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32), + (HiReg $Rss), + (LoReg $Rtt)), + (A2_combinew (A2_tfrsi 0), + (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), + 32), + (HiReg $Rss), + (HiReg $Rtt)), + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>; + +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>; + +// Multiply 64-bit signed and use upper result. +// +// For two signed 64-bit integers A and B, let A' and B' denote A and B +// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the +// sign bit of A (and identically for B). With this notation, the signed +// product A*B can be written as: +// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B') +// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B' +// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A'] +// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A'] + +def : Pat <(mulhs I64:$Rss, I64:$Rtt), + (A2_subp + (MulHU $Rss, $Rtt), + (A2_addp + (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), + (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; + +// Hexagon specific ISD nodes. +def SDTHexagonALLOCA : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, + [SDNPHasChain]>; + + +def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)), + (PS_alloca IntRegs:$Rs, imm:$A)>; + +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; + +def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>; +def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(sub I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(and I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(or I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(sub I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(and I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(or I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(sub I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(and I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(or I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(sub I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(and I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(or I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(sub I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(and I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(or I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(sub I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(and I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(or I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 100 in +def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(sra I64:$src1, I32:$src2), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>; +def: Pat<(srl I64:$src1, I32:$src2), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl I64:$src1, I32:$src2), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl I64:$src1, I32:$src2), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>; + +def: Pat<(sra I32:$src1, I32:$src2), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(srl I32:$src1, I32:$src2), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl I32:$src1, I32:$src2), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl I32:$src1, I32:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>; + +def SDTHexagonINSERT: + SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def SDTHexagonINSERTRP: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i64>]>; + +def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; +def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; + +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>; +def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; +def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; + +let AddedComplexity = 100 in +def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), + (i32 (extloadi8 (add I32:$b, 3))), + 24, 8), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io I32:$b, 0))>; + +def SDTHexagonEXTRACTU: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTURP: + SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i64>]>; + +def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; +def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; + +def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), + (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), + (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), + (S2_extractu_rp I32:$src1, I64:$src2)>; +def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), + (S2_extractup_rp I64:$src1, I64:$src2)>; + +def n8_0ImmPred: PatLeaf<(i32 imm), [{ + int64_t V = N->getSExtValue(); + return -255 <= V && V <= 0; +}]>; + +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def: Pat<(mul I32:$src1, (ineg n8_0ImmPred:$src2)), + (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>; + +multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { + defm: T_MinMax_pats<Op, I64, Inst, SwapInst>; +} + +def: Pat<(add (Sext64 I32:$Rs), I64:$Rt), + (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>; + +let AddedComplexity = 200 in { + defm: MinMax_pats_p<setge, A2_maxp, A2_minp>; + defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>; + defm: MinMax_pats_p<setle, A2_minp, A2_maxp>; + defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>; + defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>; + defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>; + defm: MinMax_pats_p<setule, A2_minup, A2_maxup>; + defm: MinMax_pats_p<setult, A2_minup, A2_maxup>; +} + +def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + + +// Map call instruction +def : Pat<(callv3 I32:$dst), + (J2_callr I32:$dst)>; +def : Pat<(callv3 tglobaladdr:$dst), + (J2_call tglobaladdr:$dst)>; +def : Pat<(callv3 texternalsym:$dst), + (J2_call texternalsym:$dst)>; +def : Pat<(callv3 tglobaltlsaddr:$dst), + (J2_call tglobaltlsaddr:$dst)>; + +def : Pat<(callv3nr I32:$dst), + (PS_callr_nr I32:$dst)>; +def : Pat<(callv3nr tglobaladdr:$dst), + (PS_call_nr tglobaladdr:$dst)>; +def : Pat<(callv3nr texternalsym:$dst), + (PS_call_nr texternalsym:$dst)>; + + +def addrga: PatLeaf<(i32 AddrGA:$Addr)>; +def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; + + +// Pats for instruction selection. + +// A class to embed the usual comparison patfrags within a zext to i32. +// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same +// names, or else the frag's "body" won't match the operands. +class CmpInReg<PatFrag Op> + : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; + +def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>; +def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>; + +def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>; +def: T_cmp32_rr_pat<C4_cmplte, setle, i1>; +def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>; + +def: T_cmp32_rr_pat<C4_cmplte, RevCmp<setge>, i1>; +def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>; + +let AddedComplexity = 100 in { + def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), + 255), 0)), + (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), + 255), 0)), + (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), + 65535), 0)), + (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), + 65535), 0)), + (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; +} + +def: Pat<(i32 (zext (i1 (seteq I32:$Rs, s32_0ImmPred:$s8)))), + (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>; +def: Pat<(i32 (zext (i1 (setne I32:$Rs, s32_0ImmPred:$s8)))), + (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>; + +// Preserve the S2_tstbit_r generation +def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, I32:$src2)), + I32:$src1)), 0)))), + (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; + +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { +def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i), + (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>; + +def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r), + (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>; +} + +// The complexity of the combine with two immediates should be greater than +// the complexity of a combine involving a register. +let AddedComplexity = 75 in { +def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6), + (A4_combineii imm:$s8, imm:$u6)>; +def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8), + (A2_combineii imm:$s8, imm:$S8)>; +} + + +def ToZext64: OutPatFrag<(ops node:$Rs), + (i64 (A4_combineir 0, (i32 $Rs)))>; +def ToSext64: OutPatFrag<(ops node:$Rs), + (i64 (A2_sxtw (i32 $Rs)))>; + +// Patterns to generate indexed loads with different forms of the address: +// - frameindex, +// - base + offset, +// - base (without offset). +multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, + PatLeaf ImmPred, InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), + (VT (ValueMod (MI AddrFI:$fi, 0)))>; + def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load I32:$Rs)), + (VT (ValueMod (MI IntRegs:$Rs, 0)))>; +} + +defm: Loadxm_pat<extloadi1, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi8, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi16, i64, ToZext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<zextloadi1, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi8, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi16, i64, ToZext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<sextloadi8, i64, ToSext64, s32_0ImmPred, L2_loadrb_io>; +defm: Loadxm_pat<sextloadi16, i64, ToSext64, s31_1ImmPred, L2_loadrh_io>; + +// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). +def: Pat<(Aext64 I32:$src1), (ToZext64 IntRegs:$src1)>; + +multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> { + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3)))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tglobaladdr:$src2)))), + (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tconstpool:$src3)))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tconstpool:$src2)))), + (MI IntRegs:$src1, 0, tconstpool:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tjumptable:$src3)))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tjumptable:$src2)))), + (MI IntRegs:$src1, 0, tjumptable:$src2)>; +} + +let AddedComplexity = 60 in { +defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>; +defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>; +defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>; + +defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>; +defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>; +defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>; + +defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>; +defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>; +} + +// 'def pats' for load instructions with base + register offset and non-zero +// immediate value. Immediate value is used to left-shift the second +// register operand. +class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add I32:$Rs, + (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; + +let AddedComplexity = 40 in { + def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_pat<load, i32, L4_loadri_rr>; + def: Loadxs_pat<load, i64, L4_loadrd_rr>; +} + +// 'def pats' for load instruction base + register offset and +// zero immediate value. +class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add I32:$Rs, I32:$Rt))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; + +let AddedComplexity = 20 in { + def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_simple_pat<load, i32, L4_loadri_rr>; + def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>; +} + +// zext i1->i64 +def: Pat<(i64 (zext I1:$src1)), + (ToZext64 (C2_muxii PredRegs:$src1, 1, 0))>; + +// zext i32->i64 +def: Pat<(Zext64 I32:$src1), + (ToZext64 IntRegs:$src1)>; + +let AddedComplexity = 40 in +multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, + PatFrag stOp> { + def : Pat<(stOp (VT RC:$src4), + (add (shl I32:$src1, u2_0ImmPred:$src2), + u32_0ImmPred:$src3)), + (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +} + +defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>; +defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>; +defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>; +defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>; + +class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Ru, (add I32:$Rs, + (i32 (shl I32:$Rt, u2_0ImmPred:$u2)))), + (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; + +let AddedComplexity = 40 in { + def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>; + def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>; + def: Storexs_pat<store, I32, S4_storeri_rr>; + def: Storexs_pat<store, I64, S4_storerd_rr>; +} + +def s30_2ProperPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v); +}]>; +def RoundTo8 : SDNodeXForm<imm, [{ + int32_t Imm = N->getSExtValue(); + return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32); +}]>; + +let AddedComplexity = 40 in +def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)), + (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>; + +class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), + (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; + +let AddedComplexity = 20 in { + def: Store_rr_pat<truncstorei8, I32, S4_storerb_rr>; + def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>; + def: Store_rr_pat<store, I32, S4_storeri_rr>; + def: Store_rr_pat<store, I64, S4_storerd_rr>; +} + + +def IMM_BYTE : SDNodeXForm<imm, [{ + // -1 etc is represented as 255 etc + // assigning to a byte restores our desired signed value. + int8_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_HALF : SDNodeXForm<imm, [{ + // -1 etc is represented as 65535 etc + // assigning to a short restores our desired signed value. + int16_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_WORD : SDNodeXForm<imm, [{ + // -1 etc can be represented as 4294967295 etc + // Currently, it's not doing this. But some optimization + // might convert -1 to a large +ve number. + // assigning to a word restores our desired signed value. + int32_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; +def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; +def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; + +// Emit store-immediate, but only when the stored value will not be constant- +// extended. The reason for that is that there is no pass that can optimize +// constant extenders in store-immediate instructions. In some cases we can +// end up will a number of such stores, all of which store the same extended +// value (e.g. after unrolling a loop that initializes floating point array). + +// Predicates to determine if the 16-bit immediate is expressible as a sign- +// extended 8-bit immediate. Store-immediate-halfword will ignore any bits +// beyond 0..15, so we don't care what is in there. + +def i16in8ImmPred: PatLeaf<(i32 imm), [{ + int64_t v = (int16_t)N->getSExtValue(); + return v == (int64_t)(int8_t)v; +}]>; + +// Predicates to determine if the 32-bit immediate is expressible as a sign- +// extended 8-bit immediate. +def i32in8ImmPred: PatLeaf<(i32 imm), [{ + int64_t v = (int32_t)N->getSExtValue(); + return v == (int64_t)(int8_t)v; +}]>; + + +let AddedComplexity = 40 in { + // Even though the offset is not extendable in the store-immediate, we + // can still generate the fi# in the base address. If the final offset + // is not valid for the instruction, we will replace it with a scratch + // register. +// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>; +// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf, +// S4_storeirh_io>; +// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>; + +// defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte, +// S4_storeirb_io>; +// defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred, +// ToImmHalf, S4_storeirh_io>; +// defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord, +// S4_storeiri_io>; + + defm: Storexm_add_pat<truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte, + S4_storeirb_io>; + defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf, + S4_storeirh_io>; + defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord, + S4_storeiri_io>; +} + +def: Storexm_simple_pat<truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>; +def: Storexm_simple_pat<truncstorei16, s32_0ImmPred, ToImmHalf, S4_storeirh_io>; +def: Storexm_simple_pat<store, s32_0ImmPred, ToImmWord, S4_storeiri_io>; + +// op(Ps, op(Pt, Pu)) +class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +// op(Ps, op(Pt, ~Pu)) +class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +def: LogLog_pat<and, and, C4_and_and>; +def: LogLog_pat<and, or, C4_and_or>; +def: LogLog_pat<or, and, C4_or_and>; +def: LogLog_pat<or, or, C4_or_or>; + +def: LogLogNot_pat<and, and, C4_and_andn>; +def: LogLogNot_pat<and, or, C4_and_orn>; +def: LogLogNot_pat<or, and, C4_or_andn>; +def: LogLogNot_pat<or, or, C4_or_orn>; + +//===----------------------------------------------------------------------===// +// PIC: Support for PIC compilations. The patterns and SD nodes defined +// below are needed to support code generation for PIC +//===----------------------------------------------------------------------===// + +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; + +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; + +def: Pat<(i64 (and I64:$Rs, (i64 (not I64:$Rt)))), + (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(i64 (or I64:$Rs, (i64 (not I64:$Rt)))), + (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +def: Pat<(add I32:$Rs, (add I32:$Ru, s32_0ImmPred:$s6)), + (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; + +// Rd=add(Rs,sub(#s6,Ru)) +def: Pat<(add I32:$src1, (sub s32_0ImmPred:$src2, + I32:$src3)), + (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + +// Rd=sub(add(Rs,#s6),Ru) +def: Pat<(sub (add I32:$src1, s32_0ImmPred:$src2), + I32:$src3), + (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + +// Rd=add(sub(Rs,Ru),#s6) +def: Pat<(add (sub I32:$src1, I32:$src3), + (s32_0ImmPred:$src2)), + (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + +def: Pat<(xor I64:$dst2, + (xor I64:$Rss, I64:$Rtt)), + (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>; +def: Pat<(or I32:$Ru, (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)), + (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>; + +def: Pat<(or I32:$src1, (and I32:$Rs, s32_0ImmPred:$s10)), + (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; + +def: Pat<(or I32:$src1, (or I32:$Rs, s32_0ImmPred:$s10)), + (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; + + + +// Count trailing zeros: 64-bit. +def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; + +// Count trailing ones: 64-bit. +def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; + +// Define leading/trailing patterns that require zero-extensions to 64 bits. +def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; + + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), + (S4_ntstbit_i I32:$Rs, u5_0ImmPred:$u5)>; + def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S4_ntstbit_r I32:$Rs, I32:$Rt)>; +} + +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... +let AddedComplexity = 100 in +def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), + (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + +let AddedComplexity = 100 in +def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), + (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + +// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be +// represented as a compare against "value & 0xFF", which is an exact match +// for cmpb (same for cmph). The patterns below do not contain any additional +// complexity that would make them preferable, and if they were actually used +// instead of cmpb/cmph, they would result in a compare against register that +// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). +def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), + (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), + (C4_nbitsclr I32:$Rs, I32:$Rt)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), + (C4_nbitsset I32:$Rs, I32:$Rt)>; + + +def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), + (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), + (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), + (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; +def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), + (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>; + +def: Pat<(add I32:$Ru, (mul (i32 IntRegs:$_src_), I32:$Rs)), + (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>; + +def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>; + +class T_Shift_CommOp_pat<InstHexagon MI, SDNode Op, SDNode ShOp> + : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8), + (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; + +let AddedComplexity = 200 in { + def : T_Shift_CommOp_pat <S4_addi_asl_ri, add, shl>; + def : T_Shift_CommOp_pat <S4_addi_lsr_ri, add, srl>; + def : T_Shift_CommOp_pat <S4_andi_asl_ri, and, shl>; + def : T_Shift_CommOp_pat <S4_andi_lsr_ri, and, srl>; +} + +let AddedComplexity = 30 in { + def : T_Shift_CommOp_pat <S4_ori_asl_ri, or, shl>; + def : T_Shift_CommOp_pat <S4_ori_lsr_ri, or, srl>; +} + +class T_Shift_Op_pat<InstHexagon MI, SDNode Op, SDNode ShOp> + : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)), + (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; + +def : T_Shift_Op_pat <S4_subi_asl_ri, sub, shl>; +def : T_Shift_Op_pat <S4_subi_lsr_ri, sub, srl>; + +let AddedComplexity = 200 in { + def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), + (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), + (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), + (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), + (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; +} + +def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), + (S4_lsli imm:$s6, IntRegs:$Rt)>; + + +//===----------------------------------------------------------------------===// +// MEMOP +//===----------------------------------------------------------------------===// + +def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ + int8_t V = N->getSExtValue(); + return -32 < V && V <= -1; +}]>; + +def m5_0Imm16Pred : PatLeaf<(i32 imm), [{ + int16_t V = N->getSExtValue(); + return -32 < V && V <= -1; +}]>; + +def m5_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t V = N->getSExtValue(); + return -31 <= V && V <= -1; +}]>; + +def IsNPow2_8 : PatLeaf<(i32 imm), [{ + uint8_t NV = ~N->getZExtValue(); + return isPowerOf2_32(NV); +}]>; + +def IsNPow2_16 : PatLeaf<(i32 imm), [{ + uint16_t NV = ~N->getZExtValue(); + return isPowerOf2_32(NV); +}]>; + +def Log2_8 : SDNodeXForm<imm, [{ + uint8_t V = N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); +}]>; + +def Log2_16 : SDNodeXForm<imm, [{ + uint16_t V = N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); +}]>; + +def LogN2_8 : SDNodeXForm<imm, [{ + uint8_t NV = ~N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); +}]>; + +def LogN2_16 : SDNodeXForm<imm, [{ + uint16_t NV = ~N->getZExtValue(); + return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); +}]>; + +def NegImm8 : SDNodeXForm<imm, [{ + int8_t NV = -N->getSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; + +def NegImm16 : SDNodeXForm<imm, [{ + int16_t NV = -N->getSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; + +def NegImm32 : SDNodeXForm<imm, [{ + int32_t NV = -N->getSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; + +def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>; + +multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper, + InstHexagon MI> { + // Addr: i32 + def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), + (MI I32:$Rs, 0, I32:$A)>; + // Addr: fi + def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs), + (MI AddrFI:$Rs, 0, I32:$A)>; +} + +multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, + SDNode Oper, InstHexagon MI> { + // Addr: i32 + def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A), + (add I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, I32:$A)>; + def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), I32:$A), + (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, I32:$A)>; + // Addr: fi + def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A), + (add AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, I32:$A)>; + def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), I32:$A), + (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, I32:$A)>; +} + +multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, + SDNode Oper, InstHexagon MI> { + defm: Memopxr_simple_pat <Load, Store, Oper, MI>; + defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; +} + +let AddedComplexity = 180 in { + // add reg + defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add, + /*anyext*/ L4_add_memopb_io>; + defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, + /*sext*/ L4_add_memopb_io>; + defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, + /*zext*/ L4_add_memopb_io>; + defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add, + /*anyext*/ L4_add_memoph_io>; + defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, + /*sext*/ L4_add_memoph_io>; + defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, + /*zext*/ L4_add_memoph_io>; + defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>; + + // sub reg + defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, + /*anyext*/ L4_sub_memopb_io>; + defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, + /*sext*/ L4_sub_memopb_io>; + defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, + /*zext*/ L4_sub_memopb_io>; + defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, + /*anyext*/ L4_sub_memoph_io>; + defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, + /*sext*/ L4_sub_memoph_io>; + defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, + /*zext*/ L4_sub_memoph_io>; + defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>; + + // and reg + defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and, + /*anyext*/ L4_and_memopb_io>; + defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, + /*sext*/ L4_and_memopb_io>; + defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, + /*zext*/ L4_and_memopb_io>; + defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and, + /*anyext*/ L4_and_memoph_io>; + defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, + /*sext*/ L4_and_memoph_io>; + defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, + /*zext*/ L4_and_memoph_io>; + defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>; + + // or reg + defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or, + /*anyext*/ L4_or_memopb_io>; + defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, + /*sext*/ L4_or_memopb_io>; + defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, + /*zext*/ L4_or_memopb_io>; + defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or, + /*anyext*/ L4_or_memoph_io>; + defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, + /*sext*/ L4_or_memoph_io>; + defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, + /*zext*/ L4_or_memoph_io>; + defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>; +} + + +multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper, + PatFrag Arg, SDNodeXForm ArgMod, + InstHexagon MI> { + // Addr: i32 + def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), + (MI I32:$Rs, 0, (ArgMod Arg:$A))>; + // Addr: fi + def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs), + (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>; +} + +multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, + SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, + InstHexagon MI> { + // Addr: i32 + def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A), + (add I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; + def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), Arg:$A), + (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; + // Addr: fi + def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A), + (add AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; + def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), Arg:$A), + (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; +} + +multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, + SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, + InstHexagon MI> { + defm: Memopxi_simple_pat <Load, Store, Oper, Arg, ArgMod, MI>; + defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; +} + + +let AddedComplexity = 200 in { + // add imm + defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, + /*anyext*/ IdImm, L4_iadd_memopb_io>; + defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, + /*sext*/ IdImm, L4_iadd_memopb_io>; + defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, + /*zext*/ IdImm, L4_iadd_memopb_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred, + /*anyext*/ IdImm, L4_iadd_memoph_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred, + /*sext*/ IdImm, L4_iadd_memoph_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred, + /*zext*/ IdImm, L4_iadd_memoph_io>; + defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm, + L4_iadd_memopw_io>; + defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred, + /*anyext*/ NegImm8, L4_iadd_memopb_io>; + defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred, + /*sext*/ NegImm8, L4_iadd_memopb_io>; + defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred, + /*zext*/ NegImm8, L4_iadd_memopb_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred, + /*anyext*/ NegImm16, L4_iadd_memoph_io>; + defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred, + /*sext*/ NegImm16, L4_iadd_memoph_io>; + defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred, + /*zext*/ NegImm16, L4_iadd_memoph_io>; + defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32, + L4_iadd_memopw_io>; + + // sub imm + defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred, + /*anyext*/ IdImm, L4_isub_memopb_io>; + defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred, + /*sext*/ IdImm, L4_isub_memopb_io>; + defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred, + /*zext*/ IdImm, L4_isub_memopb_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred, + /*anyext*/ IdImm, L4_isub_memoph_io>; + defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred, + /*sext*/ IdImm, L4_isub_memoph_io>; + defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred, + /*zext*/ IdImm, L4_isub_memoph_io>; + defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm, + L4_isub_memopw_io>; + defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred, + /*anyext*/ NegImm8, L4_isub_memopb_io>; + defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred, + /*sext*/ NegImm8, L4_isub_memopb_io>; + defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred, + /*zext*/ NegImm8, L4_isub_memopb_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred, + /*anyext*/ NegImm16, L4_isub_memoph_io>; + defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred, + /*sext*/ NegImm16, L4_isub_memoph_io>; + defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred, + /*zext*/ NegImm16, L4_isub_memoph_io>; + defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32, + L4_isub_memopw_io>; + + // clrbit imm + defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8, + /*anyext*/ LogN2_8, L4_iand_memopb_io>; + defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8, + /*sext*/ LogN2_8, L4_iand_memopb_io>; + defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8, + /*zext*/ LogN2_8, L4_iand_memopb_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16, + /*anyext*/ LogN2_16, L4_iand_memoph_io>; + defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16, + /*sext*/ LogN2_16, L4_iand_memoph_io>; + defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16, + /*zext*/ LogN2_16, L4_iand_memoph_io>; + defm: Memopxi_pat<load, store, u6_2ImmPred, and, IsNPow2_32, + LogN2_32, L4_iand_memopw_io>; + + // setbit imm + defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32, + /*anyext*/ Log2_8, L4_ior_memopb_io>; + defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32, + /*sext*/ Log2_8, L4_ior_memopb_io>; + defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32, + /*zext*/ Log2_8, L4_ior_memopb_io>; + defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32, + /*anyext*/ Log2_16, L4_ior_memoph_io>; + defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32, + /*sext*/ Log2_16, L4_ior_memoph_io>; + defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32, + /*zext*/ Log2_16, L4_ior_memoph_io>; + defm: Memopxi_pat<load, store, u6_2ImmPred, or, IsPow2_32, + Log2_32, L4_ior_memopw_io>; +} + +def : T_CMP_pat <C4_cmpneqi, setne, s32_0ImmPred>; +def : T_CMP_pat <C4_cmpltei, setle, s32_0ImmPred>; +def : T_CMP_pat <C4_cmplteui, setule, u9_0ImmPred>; + +// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). +def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)), + (C4_cmpltei IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; + +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)), + (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>; + +// For the sequence +// zext( setult ( and(Rs, 255), u8)) +// Use the isdigit transformation below + + +def u7_0PosImmPred : ImmLeaf<i32, [{ + // True if the immediate fits in an 7-bit unsigned field and + // is strictly greater than 0. + return Imm > 0 && isUInt<7>(Imm); +}]>; + + +// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' +// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// +// For the C code: +// retval = ((c>='0') & (c<='9')) ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; + +let AddedComplexity = 139 in +def: Pat<(i32 (zext (i1 (setult (and I32:$src1, 255), u7_0PosImmPred:$src2)))), + (C2_muxii (A4_cmpbgtui IntRegs:$src1, (UDEC1 imm:$src2)), 0, 1)>; + +class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; + +class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; + +class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; + +class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), + (MI Addr:$addr, (ValueMod Value:$val))>; + +let AddedComplexity = 30 in { + def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>; + def: Storea_pat<store, I32, addrga, PS_storeriabs>; + def: Storea_pat<store, I64, addrga, PS_storerdabs>; + + def: Stoream_pat<truncstorei8, I64, addrga, LoReg, PS_storerbabs>; + def: Stoream_pat<truncstorei16, I64, addrga, LoReg, PS_storerhabs>; + def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>; +} + +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; + +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; + def: Storea_pat<store, I32, addrgp, S2_storerigp>; + def: Storea_pat<store, I64, addrgp, S2_storerdgp>; + + // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" + // to "r0 = 1; memw(#foo) = r0" + let AddedComplexity = 100 in + def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; +} + +class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> + : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))), + (VT (MI tglobaladdr:$absaddr))>; + +let AddedComplexity = 30 in { + def: LoadAbs_pats <load, PS_loadriabs>; + def: LoadAbs_pats <zextloadi1, PS_loadrubabs>; + def: LoadAbs_pats <sextloadi8, PS_loadrbabs>; + def: LoadAbs_pats <extloadi8, PS_loadrubabs>; + def: LoadAbs_pats <zextloadi8, PS_loadrubabs>; + def: LoadAbs_pats <sextloadi16, PS_loadrhabs>; + def: LoadAbs_pats <extloadi16, PS_loadruhabs>; + def: LoadAbs_pats <zextloadi16, PS_loadruhabs>; + def: LoadAbs_pats <load, PS_loadrdabs, i64>; +} + +let AddedComplexity = 30 in +def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))), + (ToZext64 (PS_loadrubabs tglobaladdr:$absaddr))>; + +def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; +def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; +def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; +def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; + +def: Loadam_pat<load, i1, addrga, I32toI1, PS_loadrubabs>; +def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; + +def: Stoream_pat<store, I1, addrga, I1toI32, PS_storerbabs>; +def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; + +// Map from load(globaladdress) -> mem[u][bhwd](#foo) +class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> + : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), + (VT (MI tglobaladdr:$global))>; + +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi8, L2_loadrubgp>; + def: LoadGP_pats <sextloadi8, L2_loadrbgp>; + def: LoadGP_pats <zextloadi8, L2_loadrubgp>; + def: LoadGP_pats <extloadi16, L2_loadruhgp>; + def: LoadGP_pats <sextloadi16, L2_loadrhgp>; + def: LoadGP_pats <zextloadi16, L2_loadruhgp>; + def: LoadGP_pats <load, L2_loadrigp>; + def: LoadGP_pats <load, L2_loadrdgp, i64>; +} + +// When the Interprocedural Global Variable optimizer realizes that a certain +// global variable takes only two constant values, it shrinks the global to +// a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi1, L2_loadrubgp>; + def: LoadGP_pats <zextloadi1, L2_loadrubgp>; +} + +// Transfer global address into a register +def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; +def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>; +def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; + +let AddedComplexity = 30 in { + def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>; + def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>; + def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>; +} + +let AddedComplexity = 30 in { + def: Loada_pat<load, i32, u32_0ImmPred, PS_loadriabs>; + def: Loada_pat<sextloadi8, i32, u32_0ImmPred, PS_loadrbabs>; + def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>; + def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>; + def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>; +} + +// Indexed store word - global address. +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 100 in +defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>; + +// Load from a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrga, PS_loadrubabs>; + def: Loada_pat<sextloadi8, i32, addrga, PS_loadrbabs>; + def: Loada_pat<zextloadi8, i32, addrga, PS_loadrubabs>; + + def: Loada_pat<extloadi16, i32, addrga, PS_loadruhabs>; + def: Loada_pat<sextloadi16, i32, addrga, PS_loadrhabs>; + def: Loada_pat<zextloadi16, i32, addrga, PS_loadruhabs>; + + def: Loada_pat<load, i32, addrga, PS_loadriabs>; + def: Loada_pat<load, i64, addrga, PS_loadrdabs>; +} + +// Store to a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>; + def: Storea_pat<store, I32, addrga, PS_storeriabs>; + def: Storea_pat<store, I64, addrga, PS_storerdabs>; + + def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>; +} + +// i8/i16/i32 -> i64 loads +// We need a complexity of 120 here to override preceding handling of +// zextload. +let AddedComplexity = 120 in { + def: Loadam_pat<extloadi8, i64, addrga, ToZext64, PS_loadrubabs>; + def: Loadam_pat<sextloadi8, i64, addrga, ToSext64, PS_loadrbabs>; + def: Loadam_pat<zextloadi8, i64, addrga, ToZext64, PS_loadrubabs>; + + def: Loadam_pat<extloadi16, i64, addrga, ToZext64, PS_loadruhabs>; + def: Loadam_pat<sextloadi16, i64, addrga, ToSext64, PS_loadrhabs>; + def: Loadam_pat<zextloadi16, i64, addrga, ToZext64, PS_loadruhabs>; + + def: Loadam_pat<extloadi32, i64, addrga, ToZext64, PS_loadriabs>; + def: Loadam_pat<sextloadi32, i64, addrga, ToSext64, PS_loadriabs>; + def: Loadam_pat<zextloadi32, i64, addrga, ToZext64, PS_loadriabs>; +} + +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrgp, PS_loadrubabs>; + def: Loada_pat<sextloadi8, i32, addrgp, PS_loadrbabs>; + def: Loada_pat<zextloadi8, i32, addrgp, PS_loadrubabs>; + + def: Loada_pat<extloadi16, i32, addrgp, PS_loadruhabs>; + def: Loada_pat<sextloadi16, i32, addrgp, PS_loadrhabs>; + def: Loada_pat<zextloadi16, i32, addrgp, PS_loadruhabs>; + + def: Loada_pat<load, i32, addrgp, PS_loadriabs>; + def: Loada_pat<load, i64, addrgp, PS_loadrdabs>; +} + +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, PS_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrgp, PS_storerhabs>; + def: Storea_pat<store, I32, addrgp, PS_storeriabs>; + def: Storea_pat<store, I64, addrgp, PS_storerdabs>; +} + +def: Loada_pat<atomic_load_8, i32, addrgp, PS_loadrubabs>; +def: Loada_pat<atomic_load_16, i32, addrgp, PS_loadruhabs>; +def: Loada_pat<atomic_load_32, i32, addrgp, PS_loadriabs>; +def: Loada_pat<atomic_load_64, i64, addrgp, PS_loadrdabs>; + +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, PS_storerbabs>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>; + +def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)), + (i64 (zext (i32 (and I32:$a, (i32 65535)))))), + (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))), + (shl (Aext64 I32:$d), (i32 48))), + (A2_combinew (A2_combine_ll I32:$d, I32:$c), + (A2_combine_ll I32:$b, I32:$a))>; + +// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH +// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. +// We don't really want either one here. +def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; +def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, + [SDNPHasChain]>; + +def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3), + (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; +def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), + (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; + +def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; +def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; + +def ftoi : SDNodeXForm<fpimm, [{ + APInt I = N->getValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), + MVT::getIntegerVT(I.getBitWidth())); +}]>; + + +def: Pat<(sra (i64 (add (sra I64:$src1, u6_0ImmPred:$src2), 1)), (i32 1)), + (S2_asr_i_p_rnd I64:$src1, imm:$src2)>; + +def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i64>]>; +def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; + +def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>; + +let AddedComplexity = 20 in { + defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>; +} + +let AddedComplexity = 60 in { + defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>; + defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>; +} + +let AddedComplexity = 40 in { + def: Loadxs_pat<load, f32, L4_loadri_rr>; + def: Loadxs_pat<load, f64, L4_loadrd_rr>; +} + +let AddedComplexity = 20 in { + def: Loadxs_simple_pat<load, f32, L4_loadri_rr>; + def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>; +} + +let AddedComplexity = 80 in { + def: Loada_pat<load, f32, u32_0ImmPred, PS_loadriabs>; + def: Loada_pat<load, f32, addrga, PS_loadriabs>; + def: Loada_pat<load, f64, addrga, PS_loadrdabs>; +} + +let AddedComplexity = 100 in { + def: LoadGP_pats <load, L2_loadrigp, f32>; + def: LoadGP_pats <load, L2_loadrdgp, f64>; +} + +let AddedComplexity = 20 in { + defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>; +} + +// Simple patterns should be tried with the least priority. +def: Storex_simple_pat<store, F32, S2_storeri_io>; +def: Storex_simple_pat<store, F64, S2_storerd_io>; + +let AddedComplexity = 60 in { + defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>; + defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>; +} + +let AddedComplexity = 40 in { + def: Storexs_pat<store, F32, S4_storeri_rr>; + def: Storexs_pat<store, F64, S4_storerd_rr>; +} + +let AddedComplexity = 20 in { + def: Store_rr_pat<store, F32, S4_storeri_rr>; + def: Store_rr_pat<store, F64, S4_storerd_rr>; +} + +let AddedComplexity = 80 in { + def: Storea_pat<store, F32, addrga, PS_storeriabs>; + def: Storea_pat<store, F64, addrga, PS_storerdabs>; +} + +let AddedComplexity = 100 in { + def: Storea_pat<store, F32, addrgp, S2_storerigp>; + def: Storea_pat<store, F64, addrgp, S2_storerdgp>; +} + +defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>; +defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>; +def: Storex_simple_pat<store, F32, S2_storeri_io>; +def: Storex_simple_pat<store, F64, S2_storerd_io>; + +def: Pat<(fadd F32:$src1, F32:$src2), + (F2_sfadd F32:$src1, F32:$src2)>; + +def: Pat<(fsub F32:$src1, F32:$src2), + (F2_sfsub F32:$src1, F32:$src2)>; + +def: Pat<(fmul F32:$src1, F32:$src2), + (F2_sfmpy F32:$src1, F32:$src2)>; + +let Predicates = [HasV5T] in { + def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>; + def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>; +} + +let AddedComplexity = 100, Predicates = [HasV5T] in { + class SfSel12<PatFrag Cmp, InstHexagon MI> + : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt), + (MI F32:$Rs, F32:$Rt)>; + class SfSel21<PatFrag Cmp, InstHexagon MI> + : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs), + (MI F32:$Rs, F32:$Rt)>; + + def: SfSel12<setolt, F2_sfmin>; + def: SfSel12<setole, F2_sfmin>; + def: SfSel12<setogt, F2_sfmax>; + def: SfSel12<setoge, F2_sfmax>; + def: SfSel21<setolt, F2_sfmax>; + def: SfSel21<setole, F2_sfmax>; + def: SfSel21<setogt, F2_sfmin>; + def: SfSel21<setoge, F2_sfmin>; +} + +class T_fcmp32_pat<PatFrag OpNode, InstHexagon MI> + : Pat<(i1 (OpNode F32:$src1, F32:$src2)), + (MI F32:$src1, F32:$src2)>; +class T_fcmp64_pat<PatFrag OpNode, InstHexagon MI> + : Pat<(i1 (OpNode F64:$src1, F64:$src2)), + (MI F64:$src1, F64:$src2)>; + +def: T_fcmp32_pat<setoge, F2_sfcmpge>; +def: T_fcmp32_pat<setuo, F2_sfcmpuo>; +def: T_fcmp32_pat<setoeq, F2_sfcmpeq>; +def: T_fcmp32_pat<setogt, F2_sfcmpgt>; + +def: T_fcmp64_pat<setoge, F2_dfcmpge>; +def: T_fcmp64_pat<setuo, F2_dfcmpuo>; +def: T_fcmp64_pat<setoeq, F2_dfcmpeq>; +def: T_fcmp64_pat<setogt, F2_dfcmpgt>; + +let Predicates = [HasV5T] in +multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (IntMI F32:$src1, F32:$src2)>; + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (DoubleMI F64:$src1, F64:$src2)>; +} + +defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>; +defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>; +defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>; +defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>; +defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) +// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>; +defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) +// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) +// setne(setolt(op1, op2), 0) -> setogt(op2, op1) +// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + + // DoubleRegs + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; +} + +defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>; + + +// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp +let Predicates = [HasV5T] in { + def: Pat<(i1 (seto F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; + def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)), + (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; + def: Pat<(i1 (seto F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; + def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)), + (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>; +} + +// Ordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setolt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + def: Pat<(i1 (setolt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; +} + +// Unordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setult F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpgt F32:$src2, F32:$src1))>; + def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), + (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; + def: Pat<(i1 (setult F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpgt F64:$src2, F64:$src1))>; + def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), + (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>; +} + +// Ordered le. +let Predicates = [HasV5T] in { + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setole F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setole F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; +} + +// Unordered le. +let Predicates = [HasV5T] in { +// rs <= rt -> rt >= rs. + def: Pat<(i1 (setule F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpge F32:$src2, F32:$src1))>; + def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), + (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; + def: Pat<(i1 (setule F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpge F64:$src2, F64:$src1))>; + def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), + (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>; +} + +// Ordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setone F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setone F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; + def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; +} + +// Unordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setune F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; + def: Pat<(i1 (setune F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; + def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), + (C2_not (F2_sfcmpeq F32:$src1, + (f32 (A2_tfrsi (ftoi $src2))))))>; + def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), + (C2_not (F2_dfcmpeq F64:$src1, + (CONST64 (ftoi $src2)))))>; +} + +// Besides set[o|u][comparions], we also need set[comparisons]. +let Predicates = [HasV5T] in { + // lt. + def: Pat<(i1 (setlt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + def: Pat<(i1 (setlt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; + + // le. + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setle F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setle F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; + + // ne. + def: Pat<(i1 (setne F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setne F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; + def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; +} + + +def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>; +def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>; + +def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>; +def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>; +def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>; +def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>; + +def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>; +def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>; +def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>; +def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>; + +def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>; +def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>; +def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>; +def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>; + +def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>; +def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>; +def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>; +def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>; + +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +let Predicates = [HasV5T] in { + def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; + def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; + def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; + def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; +} + +def : Pat <(fma F32:$src2, F32:$src3, F32:$src1), + (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; + +def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1), + (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; + +def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1), + (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; + +def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt), + (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F32:$src2, F32:$src3), + (C2_mux I1:$src1, F32:$src2, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), + (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F64:$src2, F64:$src3), + (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), + (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = mux(p0, #i, r1) +def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3), + (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = mux(p0, r1, #i) +def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3), + (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>, + Requires<[HasV5T]>; + +def: Pat<(i32 (fp_to_sint F64:$src1)), + (LoReg (F2_conv_df2d_chop F64:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(fabs F32:$src1), + (S2_clrbit_i F32:$src1, 31)>, + Requires<[HasV5T]>; + +def : Pat <(fneg F32:$src1), + (S2_togglebit_i F32:$src1, 31)>, + Requires<[HasV5T]>; + +def: Pat<(fabs F64:$Rs), + (REG_SEQUENCE DoubleRegs, + (S2_clrbit_i (HiReg $Rs), 31), isub_hi, + (i32 (LoReg $Rs)), isub_lo)>; + +def: Pat<(fneg F64:$Rs), + (REG_SEQUENCE DoubleRegs, + (S2_togglebit_i (HiReg $Rs), 31), isub_hi, + (i32 (LoReg $Rs)), isub_lo)>; + +def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + + +def s4_6ImmPred: PatLeaf<(i32 imm), [{ + int64_t V = N->getSExtValue(); + return isShiftedInt<4,6>(V); +}]>; + +def s4_7ImmPred: PatLeaf<(i32 imm), [{ + int64_t V = N->getSExtValue(); + return isShiftedInt<4,7>(V); +}]>; + + +multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { + // Aligned stores + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // 128B Aligned stores + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + + // Fold Add R+OFF into vector store. + let AddedComplexity = 10 in { + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // Fold Add R+OFF into vector store 128B. + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + } +} + +defm : vS32b_ai_pats <v64i8, v128i8>; +defm : vS32b_ai_pats <v32i16, v64i16>; +defm : vS32b_ai_pats <v16i32, v32i32>; +defm : vS32b_ai_pats <v8i64, v16i64>; + + +multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { + // Aligned loads + def : Pat < (VTSgl (alignedload IntRegs:$addr)), + (V6_vL32b_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; + def : Pat < (VTSgl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; + + // 128B Load + def : Pat < (VTDbl (alignedload IntRegs:$addr)), + (V6_vL32b_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; + def : Pat < (VTDbl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; + + // Fold Add R+OFF into vector load. + let AddedComplexity = 10 in { + def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + + def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + } +} + +defm : vL32b_ai_pats <v64i8, v128i8>; +defm : vL32b_ai_pats <v32i16, v64i16>; +defm : vL32b_ai_pats <v16i32, v32i32>; +defm : vL32b_ai_pats <v8i64, v16i64>; + +multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; + + def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerw_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerwu_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : STrivv_pats <v128i8, v256i8>; +defm : STrivv_pats <v64i16, v128i16>; +defm : STrivv_pats <v32i32, v64i32>; +defm : STrivv_pats <v16i64, v32i64>; + +multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(VTSgl (alignedload I32:$addr)), + (PS_vloadrw_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; + def : Pat<(VTSgl (unalignedload I32:$addr)), + (PS_vloadrwu_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; + + def : Pat<(VTDbl (alignedload I32:$addr)), + (PS_vloadrw_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; + def : Pat<(VTDbl (unalignedload I32:$addr)), + (PS_vloadrwu_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; +} + +defm : LDrivv_pats <v128i8, v256i8>; +defm : LDrivv_pats <v64i16, v128i16>; +defm : LDrivv_pats <v32i32, v64i32>; +defm : LDrivv_pats <v16i64, v32i64>; + +let Predicates = [HasV60T,UseHVXSgl] in { + def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt), + (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt), + (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>; +} +let Predicates = [HasV60T,UseHVXDbl] in { + def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt), + (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>; + def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt), + (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>; +} + + +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; + +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; + +def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), + (v16i32 VectorRegs:$Vt))), + (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), + (v32i32 VecDblRegs:$Vt))), + (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + +def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>, + SDTCisInt<3>]>; + +def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>; + +// 0 as the last argument denotes vpacke. 1 denotes vpacko +def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt), (i32 0))), + (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt), (i32 1))), + (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt), (i32 0))), + (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt), (i32 1))), + (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; + +def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt), (i32 0))), + (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt), (i32 1))), + (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt), (i32 0))), + (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt), (i32 1))), + (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + +def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; +def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; +def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; +def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; +def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; +def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; +def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; +def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + + +multiclass bitconvert_32<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a IntRegs:$src))), + (b IntRegs:$src)>; + def : Pat <(a (bitconvert (b IntRegs:$src))), + (a IntRegs:$src)>; +} + +multiclass bitconvert_64<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a DoubleRegs:$src))), + (b DoubleRegs:$src)>; + def : Pat <(a (bitconvert (b DoubleRegs:$src))), + (a DoubleRegs:$src)>; +} + +// Bit convert vector types to integers. +defm : bitconvert_32<v4i8, i32>; +defm : bitconvert_32<v2i16, i32>; +defm : bitconvert_64<v8i8, i64>; +defm : bitconvert_64<v4i16, i64>; +defm : bitconvert_64<v2i32, i64>; + +def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), + (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>; +def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), + (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>; +def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), + (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>; + +def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), + (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>; +def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), + (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>; +def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), + (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>; + +def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; + +def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; + +def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; +def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; + +// Replicate the low 8-bits from 32-bits input register into each of the +// four bytes of 32-bits destination register. +def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; + +// Replicate the low 16-bits from 32-bits input register into each of the +// four halfwords of 64-bits destination register. +def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + + +class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> + : Pat <(Op Type:$Rss, Type:$Rtt), + (MI Type:$Rss, Type:$Rtt)>; + +def: VArith_pat <A2_vaddub, add, V8I8>; +def: VArith_pat <A2_vaddh, add, V4I16>; +def: VArith_pat <A2_vaddw, add, V2I32>; +def: VArith_pat <A2_vsubub, sub, V8I8>; +def: VArith_pat <A2_vsubh, sub, V4I16>; +def: VArith_pat <A2_vsubw, sub, V2I32>; + +def: VArith_pat <A2_and, and, V2I16>; +def: VArith_pat <A2_xor, xor, V2I16>; +def: VArith_pat <A2_or, or, V2I16>; + +def: VArith_pat <A2_andp, and, V8I8>; +def: VArith_pat <A2_andp, and, V4I16>; +def: VArith_pat <A2_andp, and, V2I32>; +def: VArith_pat <A2_orp, or, V8I8>; +def: VArith_pat <A2_orp, or, V4I16>; +def: VArith_pat <A2_orp, or, V2I32>; +def: VArith_pat <A2_xorp, xor, V8I8>; +def: VArith_pat <A2_xorp, xor, V4I16>; +def: VArith_pat <A2_xorp, xor, V2I32>; + +def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), + (i32 u5_0ImmPred:$c))))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), + (i32 u5_0ImmPred:$c))))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), + (i32 u5_0ImmPred:$c))))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; + +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; + + +def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; +def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; + +def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; + +def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; + +class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(Op Value:$Rs, I32:$Rt), + (MI Value:$Rs, I32:$Rt)>; + +def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; +def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; +def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; +def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; +def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; +def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; + + +def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; +def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; +def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; + +def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; + + +class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(i1 (Op Value:$Rs, Value:$Rt)), + (MI Value:$Rs, Value:$Rt)>; + +def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; +def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; +def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; + +def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; +def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; +def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; + +def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; + + +class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> + : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), + (MI InVal:$Rs, InVal:$Rt)>; + +def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; + +def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; + +def: Pat<(mul V2I32:$Rs, V2I32:$Rt), + (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), + (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>; + + +// Adds two v4i8: Hexagon does not have an insn for this one, so we +// use the double add v8i8, and use only the low part of the result. +def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; + +// Subtract two v4i8: Hexagon does not have an insn for this one, so we +// use the double sub v8i8, and use only the low part of the result. +def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; + +// +// No 32 bit vector mux. +// +def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), + (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; +def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), + (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; + +// +// 64-bit vector mux. +// +def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; + +// +// No 32 bit vector compare. +// +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; + +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; + + +class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, + ValueType CmpTy> + : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), + (InvMI Value:$Rt, Value:$Rs)>; + +// Map from a compare operation to the corresponding instruction with the +// order of operands reversed, e.g. x > y --> cmp.lt(y,x). +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; + +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; + +// Map from vcmpne(Rss) -> !vcmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; + + +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; + +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb + +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh + +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + + +def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; +def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; + +def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; +def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; + +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; + +// Sign extends a v2i8 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; + +// Sign extends a v2i16 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; + + +// Multiplies two v2i16 and returns a v2i32. We are using here the +// saturating multiply, as hexagon does not provide a non saturating +// vector multiply, and saturation does not impact the result that is +// in double precision of the operands. + +// Multiplies two v2i16 vectors: as Hexagon does not have a multiply +// with the C semantics for this one, this pattern uses the half word +// multiply vmpyh that takes two v2i16 and returns a v2i32. This is +// then truncated to fit this back into a v2i16 and to simulate the +// wrap around semantics for unsigned in C. +def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), + (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; + +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), + (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; + +// Multiplies two v4i16 vectors. +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), + (vmpyh (LoReg $Rs), (LoReg $Rt)))>; + +def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), + (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), + (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; + +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; + +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; + +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; + +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; + +def SDTHexagonBinOp64 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; + +def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; +def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; +def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; +def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; + +class ShufflePat<InstHexagon MI, SDNode Op> + : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), + (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b +def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; + +// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b +def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; + +// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h +def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; + +// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h +def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; + + +// Truncated store from v4i16 to v4i8. +def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; + +// Truncated store from v2i32 to v2i16. +def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; + +def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), + (LoReg $Rs))))>; + +def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; + + +// Zero and sign extended load from v2i8 into v2i16. +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; + +def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; + +def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), + (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; + +def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), + (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp index b064dec..ee32093 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -10,7 +10,7 @@ // Transform the following pattern // %vreg170<def> = SXTW %vreg166 // ... -// %vreg176<def> = COPY %vreg170:subreg_loreg +// %vreg176<def> = COPY %vreg170:isub_lo // // Into // %vreg176<def> = COPY vreg166 @@ -93,7 +93,7 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const override { + StringRef getPassName() const override { return "Hexagon optimize redundant zero and size extends"; } @@ -167,9 +167,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Look for this sequence below // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 - // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. + // %vregIntReg = COPY %vregDoubleReg1:isub_lo. // and convert into - // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. + // %vregIntReg = COPY %vregDoubleReg0:isub_hi. if (MI.getOpcode() == Hexagon::S2_lsr_i_p) { assert(MI.getNumOperands() == 3); MachineOperand &Dst = MI.getOperand(0); @@ -180,7 +180,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src1.getReg(); PeepholeDoubleRegsMap[DstReg] = - std::make_pair(*&SrcReg, Hexagon::subreg_hireg); + std::make_pair(*&SrcReg, Hexagon::isub_hi); } // Look for P=NOT(P). @@ -201,14 +201,14 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { } // Look for copy: - // %vreg176<def> = COPY %vreg170:subreg_loreg + // %vreg176<def> = COPY %vreg170:isub_lo if (!DisableOptSZExt && MI.isCopy()) { assert(MI.getNumOperands() == 2); MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); // Make sure we are copying the lower 32 bits. - if (Src.getSubReg() != Hexagon::subreg_loreg) + if (Src.getSubReg() != Hexagon::isub_lo) continue; unsigned DstReg = Dst.getReg(); @@ -250,6 +250,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { // Change the 1st operand and, flip the opcode. MI.getOperand(0).setReg(PeepholeSrc); + MRI->clearKillFlags(PeepholeSrc); int NewOp = QII->getInvertedPredicatedOpcode(MI.getOpcode()); MI.setDesc(QII->get(NewOp)); Done = true; @@ -280,6 +281,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { unsigned PSrc = MI.getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { MI.getOperand(PR).setReg(POrig); + MRI->clearKillFlags(POrig); MI.setDesc(QII->get(NewOp)); // Swap operands S1 and S2. MachineOperand Op1 = MI.getOperand(S1); @@ -304,6 +306,7 @@ void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { if (Src.isReg()) { Dst.setReg(Src.getReg()); Dst.setSubReg(Src.getSubReg()); + MRI->clearKillFlags(Src.getReg()); } else if (Src.isImm()) { Dst.ChangeToImmediate(Src.getImm()); } else { @@ -316,7 +319,7 @@ void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { Dst.setImm(Src.getImm()); } else if (Src.isReg()) { Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(), - Src.isKill(), Src.isDead(), Src.isUndef(), + false, Src.isDead(), Src.isUndef(), Src.isDebug()); Dst.setSubReg(Src.getSubReg()); } else { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp deleted file mode 100644 index 06719cd..0000000 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===--- HexagonRDF.cpp ---------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "HexagonRDF.h" -#include "HexagonInstrInfo.h" -#include "HexagonRegisterInfo.h" - -#include "llvm/CodeGen/MachineInstr.h" - -using namespace llvm; -using namespace rdf; - -bool HexagonRegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const { - if (RA == RB) - return true; - - if (TargetRegisterInfo::isVirtualRegister(RA.Reg) && - TargetRegisterInfo::isVirtualRegister(RB.Reg)) { - // Hexagon-specific cases. - if (RA.Reg == RB.Reg) { - if (RA.Sub == 0) - return true; - if (RB.Sub == 0) - return false; - } - } - - return RegisterAliasInfo::covers(RA, RB); -} - -bool HexagonRegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) - const { - if (RRs.count(RR)) - return true; - - if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) { - assert(TargetRegisterInfo::isVirtualRegister(RR.Reg)); - // Check if both covering subregisters are present. - bool HasLo = RRs.count({RR.Reg, Hexagon::subreg_loreg}); - bool HasHi = RRs.count({RR.Reg, Hexagon::subreg_hireg}); - if (HasLo && HasHi) - return true; - } - - if (RR.Sub == 0) { - // Check if both covering subregisters are present. - unsigned Lo = TRI.getSubReg(RR.Reg, Hexagon::subreg_loreg); - unsigned Hi = TRI.getSubReg(RR.Reg, Hexagon::subreg_hireg); - if (RRs.count({Lo, 0}) && RRs.count({Hi, 0})) - return true; - } - - return RegisterAliasInfo::covers(RRs, RR); -} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h deleted file mode 100644 index 9a63150..0000000 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h +++ /dev/null @@ -1,28 +0,0 @@ -//===--- HexagonRDF.h -----------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef HEXAGON_RDF_H -#define HEXAGON_RDF_H -#include "RDFGraph.h" - -namespace llvm { - class TargetRegisterInfo; - -namespace rdf { - struct HexagonRegisterAliasInfo : public RegisterAliasInfo { - HexagonRegisterAliasInfo(const TargetRegisterInfo &TRI) - : RegisterAliasInfo(TRI) {} - bool covers(RegisterRef RA, RegisterRef RR) const override; - bool covers(const RegisterSet &RRs, RegisterRef RR) const override; - }; -} // namespace rdf -} // namespace llvm - -#endif - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index 642a878..30640e1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "HexagonInstrInfo.h" -#include "HexagonRDF.h" #include "HexagonSubtarget.h" #include "RDFCopy.h" #include "RDFDeadCode.h" @@ -50,14 +49,14 @@ namespace { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { + StringRef getPassName() const override { return "Hexagon RDF optimizations"; } bool runOnMachineFunction(MachineFunction &MF) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } static char ID; @@ -99,6 +98,7 @@ bool HexagonCP::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { EM.insert(std::make_pair(DstR, SrcR)); }; + DataFlowGraph &DFG = getDFG(); unsigned Opc = MI->getOpcode(); switch (Opc) { case Hexagon::A2_combinew: { @@ -106,23 +106,23 @@ bool HexagonCP::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { const MachineOperand &HiOp = MI->getOperand(1); const MachineOperand &LoOp = MI->getOperand(2); assert(DstOp.getSubReg() == 0 && "Unexpected subregister"); - mapRegs({ DstOp.getReg(), Hexagon::subreg_hireg }, - { HiOp.getReg(), HiOp.getSubReg() }); - mapRegs({ DstOp.getReg(), Hexagon::subreg_loreg }, - { LoOp.getReg(), LoOp.getSubReg() }); + mapRegs(DFG.makeRegRef(DstOp.getReg(), Hexagon::isub_hi), + DFG.makeRegRef(HiOp.getReg(), HiOp.getSubReg())); + mapRegs(DFG.makeRegRef(DstOp.getReg(), Hexagon::isub_lo), + DFG.makeRegRef(LoOp.getReg(), LoOp.getSubReg())); return true; } case Hexagon::A2_addi: { const MachineOperand &A = MI->getOperand(2); if (!A.isImm() || A.getImm() != 0) return false; + LLVM_FALLTHROUGH; } - // Fall through. case Hexagon::A2_tfr: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &SrcOp = MI->getOperand(1); - mapRegs({ DstOp.getReg(), DstOp.getSubReg() }, - { SrcOp.getReg(), SrcOp.getSubReg() }); + mapRegs(DFG.makeRegRef(DstOp.getReg(), DstOp.getSubReg()), + DFG.makeRegRef(SrcOp.getReg(), SrcOp.getSubReg())); return true; } } @@ -182,7 +182,8 @@ void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) { llvm_unreachable("Invalid operand"); }; DenseMap<NodeId,unsigned> OpMap; - NodeList Refs = IA.Addr->members(getDFG()); + DataFlowGraph &DFG = getDFG(); + NodeList Refs = IA.Addr->members(DFG); for (NodeAddr<RefNode*> RA : Refs) OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp()))); @@ -191,9 +192,9 @@ void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) { for (NodeAddr<RefNode*> RA : Refs) { unsigned N = OpMap[RA.Id]; if (N < OpNum) - RA.Addr->setRegRef(&MI->getOperand(N)); + RA.Addr->setRegRef(&MI->getOperand(N), DFG); else if (N > OpNum) - RA.Addr->setRegRef(&MI->getOperand(N-1)); + RA.Addr->setRegRef(&MI->getOperand(N-1), DFG); } } @@ -202,11 +203,11 @@ bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) { if (!getDFG().IsCode<NodeAttrs::Stmt>(IA)) return false; DataFlowGraph &DFG = getDFG(); - MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); + MachineInstr &MI = *NodeAddr<StmtNode*>(IA).Addr->getCode(); auto &HII = static_cast<const HexagonInstrInfo&>(DFG.getTII()); if (HII.getAddrMode(MI) != HexagonII::PostInc) return false; - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI.getOpcode(); unsigned OpNum, NewOpc; switch (Opc) { case Hexagon::L2_loadri_pi: @@ -240,12 +241,12 @@ bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) { return getDeadNodes().count(DA.Id); }; NodeList Defs; - MachineOperand &Op = MI->getOperand(OpNum); + MachineOperand &Op = MI.getOperand(OpNum); for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) { if (&DA.Addr->getOp() != &Op) continue; Defs = DFG.getRelatedRefs(IA, DA); - if (!std::all_of(Defs.begin(), Defs.end(), IsDead)) + if (!all_of(Defs, IsDead)) return false; break; } @@ -255,12 +256,12 @@ bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) { Remove.insert(D.Id); if (trace()) - dbgs() << "Rewriting: " << *MI; - MI->setDesc(HII.get(NewOpc)); - MI->getOperand(OpNum+2).setImm(0); + dbgs() << "Rewriting: " << MI; + MI.setDesc(HII.get(NewOpc)); + MI.getOperand(OpNum+2).setImm(0); removeOperand(IA, OpNum); if (trace()) - dbgs() << " to: " << *MI; + dbgs() << " to: " << MI; return true; } @@ -286,9 +287,8 @@ bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) { if (RDFDump) MF.print(dbgs() << "Before " << getPassName() << "\n", nullptr); - HexagonRegisterAliasInfo HAI(HRI); TargetOperandInfo TOI(HII); - DataFlowGraph G(MF, HII, HRI, *MDT, MDF, HAI, TOI); + DataFlowGraph G(MF, HII, HRI, *MDT, MDF, TOI); // Dead phi nodes are necessary for copy propagation: we can add a use // of a register in a block where it would need a phi node, but which // was dead (and removed) during the graph build time. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 23ebfd4..d3f230d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -151,6 +151,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) Reserved.set(Hexagon::CS0); Reserved.set(Hexagon::CS1); Reserved.set(Hexagon::CS); + Reserved.set(Hexagon::USR); return Reserved; } @@ -180,12 +181,12 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Opc = MI.getOpcode(); switch (Opc) { - case Hexagon::TFR_FIA: + case Hexagon::PS_fia: MI.setDesc(HII.get(Hexagon::A2_addi)); MI.getOperand(FIOp).ChangeToImmediate(RealOffset); MI.RemoveOperand(FIOp+1); return; - case Hexagon::TFR_FI: + case Hexagon::PS_fi: // Set up the instruction for updating below. MI.setDesc(HII.get(Hexagon::A2_addi)); break; @@ -234,6 +235,28 @@ unsigned HexagonRegisterInfo::getStackRegister() const { } +unsigned HexagonRegisterInfo::getHexagonSubRegIndex( + const TargetRegisterClass *RC, unsigned GenIdx) const { + assert(GenIdx == Hexagon::ps_sub_lo || GenIdx == Hexagon::ps_sub_hi); + + static const unsigned ISub[] = { Hexagon::isub_lo, Hexagon::isub_hi }; + static const unsigned VSub[] = { Hexagon::vsub_lo, Hexagon::vsub_hi }; + + switch (RC->getID()) { + case Hexagon::CtrRegs64RegClassID: + case Hexagon::DoubleRegsRegClassID: + return ISub[GenIdx]; + case Hexagon::VecDblRegsRegClassID: + case Hexagon::VecDblRegs128BRegClassID: + return VSub[GenIdx]; + } + + if (const TargetRegisterClass *SuperRC = *RC->getSuperClasses()) + return getHexagonSubRegIndex(SuperRC, GenIdx); + + llvm_unreachable("Invalid register class"); +} + bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h index fc70679..1fb295b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -22,6 +22,12 @@ #include "HexagonGenRegisterInfo.inc" namespace llvm { + +namespace Hexagon { + // Generic (pseudo) subreg indices for use with getHexagonSubRegIndex. + enum { ps_sub_lo = 0, ps_sub_hi = 1 }; +} + class HexagonRegisterInfo : public HexagonGenRegisterInfo { public: HexagonRegisterInfo(); @@ -61,6 +67,9 @@ public: unsigned getFrameRegister() const; unsigned getStackRegister() const; + unsigned getHexagonSubRegIndex(const TargetRegisterClass *RC, + unsigned GenIdx) const; + const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF, const TargetRegisterClass *RC) const; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td index 4d0d411..a75f351 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -81,8 +81,10 @@ let Namespace = "Hexagon" in { let Num = !cast<bits<5>>(num); } - def subreg_loreg : SubRegIndex<32>; - def subreg_hireg : SubRegIndex<32, 32>; + def isub_lo : SubRegIndex<32>; + def isub_hi : SubRegIndex<32, 32>; + def vsub_lo : SubRegIndex<512>; + def vsub_hi : SubRegIndex<512, 512>; def subreg_overflow : SubRegIndex<1, 0>; // Integer registers. @@ -95,7 +97,7 @@ let Namespace = "Hexagon" in { def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>; // Aliases of the R* registers used to hold 64-bit int values (doubles). - let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; @@ -150,12 +152,12 @@ let Namespace = "Hexagon" in { // Define C8 separately and make it aliased with USR. // The problem is that USR has subregisters (e.g. overflow). If USR was // specified as a subregister of C9_8, it would imply that subreg_overflow - // and subreg_loreg can be composed, which leads to all kinds of issues + // and isub_lo can be composed, which leads to all kinds of issues // with lane masks. def C8 : Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>; def PC : Rc<9, "pc">, DwarfRegNum<[76]>; def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; - def GP : Rc<11, "gp">, DwarfRegNum<[78]>; + def GP : Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>; def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>; def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>; @@ -163,9 +165,10 @@ let Namespace = "Hexagon" in { } // Control registers pairs. - let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; + def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>; def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>; // Use C8 instead of USR as a subregister of C9_8. def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>; @@ -179,7 +182,7 @@ let Namespace = "Hexagon" in { } // Aliases of the V* registers used to hold double vec values. - let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + let SubRegIndices = [vsub_lo, vsub_hi], CoveredBySubRegs = 1 in { def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>; def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>; def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>; @@ -256,10 +259,13 @@ def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>; let Size = 32, isAllocatable = 0 in def CtrRegs : RegisterClass<"Hexagon", [i32], 32, - (add LC0, SA0, LC1, SA1, - P3_0, - M0, M1, C6, C7, CS0, CS1, UPCL, UPCH, - USR, USR_OVF, UGP, GP, PC)>; + (add LC0, SA0, LC1, SA1, + P3_0, C5, + M0, M1, C6, C7, C8, CS0, CS1, UPCL, UPCH, + USR, UGP, GP, PC)>; + +let isAllocatable = 0 in +def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>; let Size = 64, isAllocatable = 0 in def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64, @@ -278,8 +284,3 @@ def VolatileV3 { W12, W13, W14, W15, Q0, Q1, Q2, Q3]; } - -def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a), -[{ - return isPositiveHalfWord(N); -}]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td deleted file mode 100644 index d8feb89..0000000 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td +++ /dev/null @@ -1,121 +0,0 @@ -//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - - -// -// selectcc mappings. -// -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETEQ)), - (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)), - IntRegs:$tval, IntRegs:$fval))>; - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETNE)), - (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))), - IntRegs:$tval, IntRegs:$fval))>; - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETGT)), - (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)), - IntRegs:$tval, IntRegs:$fval))>; - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETUGT)), - (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)), - IntRegs:$tval, IntRegs:$fval))>; - - - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETULT)), - (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, - (ADD_ri IntRegs:$rhs, -1)))), - IntRegs:$tval, IntRegs:$fval))>; - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETLT)), - (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, - (ADD_ri IntRegs:$rhs, -1)))), - IntRegs:$tval, IntRegs:$fval))>; - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETLE)), - (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))), - IntRegs:$tval, IntRegs:$fval))>; - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETULE)), - (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))), - IntRegs:$tval, IntRegs:$fval))>; - - -// -// selectcc mappings for greater-equal-to Rs => greater-than Rs-1. -// -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETGE)), - (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), - IntRegs:$tval, IntRegs:$fval))>; - -def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETUGE)), - (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), - IntRegs:$tval, IntRegs:$fval))>; - - - -// -// selectcc mappings for predicate comparisons. -// -// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into: -// pt = not(p1 xor p2) -// Rd = mux(pt, true_val, false_val) -// and similarly for SETNE -// -def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETNE)), - (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval, - IntRegs:$fval))>; - -def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, - IntRegs:$fval, SETEQ)), - (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))), - IntRegs:$tval, IntRegs:$fval))>; - - -// -// selectcc mappings for 64-bit operands are messy. Hexagon does not have a -// MUX64 o, use this: -// selectcc(Rss, Rdd, tval, fval, cond) -> -// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi), -// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo)) - -// setgt-64. -def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, - DoubleRegs:$fval, SETGT)), - (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), - (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), - (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), - (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; - - -// setlt-64 -> setgt-64. -def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, - DoubleRegs:$fval, SETLT)), - (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, - (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), - (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), - (MUX_rr (CMPGT64rr DoubleRegs:$lhs, - (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), - (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index 00dfed7..1073053 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -44,14 +44,17 @@ SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( const char *SpecialMemcpyName = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + const MachineFunction &MF = DAG.getMachineFunction(); + bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls(); + unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*DAG.getContext()), - DAG.getTargetExternalSymbol( - SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), + DAG.getTargetExternalSymbol(SpecialMemcpyName, + TLI.getPointerTy(DAG.getDataLayout()), Flags), std::move(Args)) .setDiscardResult(); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index 6f2a42c..a83a8ef 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -20,6 +20,8 @@ namespace llvm { class HexagonSelectionDAGInfo : public SelectionDAGTargetInfo { public: + explicit HexagonSelectionDAGInfo() = default; + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 5a94cce..6848434 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // When the compiler is invoked with no small data, for instance, with the -G0 -// command line option, then all CONST32_* opcodes should be broken down into +// command line option, then all CONST* opcodes should be broken down into // appropriate LO and HI instructions. This splitting is done by this pass. // The only reason this is not done in the DAG lowering itself is that there // is no simple way of getting the register allocator to allot the same hard @@ -17,24 +17,13 @@ // //===----------------------------------------------------------------------===// -#include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -47,28 +36,30 @@ namespace llvm { } namespace { - -class HexagonSplitConst32AndConst64 : public MachineFunctionPass { - public: + class HexagonSplitConst32AndConst64 : public MachineFunctionPass { + public: static char ID; - HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {} - - const char *getPassName() const override { + HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) { + PassRegistry &R = *PassRegistry::getPassRegistry(); + initializeHexagonSplitConst32AndConst64Pass(R); + } + StringRef getPassName() const override { return "Hexagon Split Const32s and Const64s"; } bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } -}; - + }; +} char HexagonSplitConst32AndConst64::ID = 0; +INITIALIZE_PASS(HexagonSplitConst32AndConst64, "split-const-for-sdata", + "Hexagon Split Const32s and Const64s", false, false) bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { - const HexagonTargetObjectFile &TLOF = *static_cast<const HexagonTargetObjectFile *>( Fn.getTarget().getObjFileLowering()); @@ -79,93 +70,46 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); // Loop over all of the basic blocks - for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock *MBB = &*MBBb; - // Traverse the basic block - MachineBasicBlock::iterator MII = MBB->begin(); - MachineBasicBlock::iterator MIE = MBB->end (); - while (MII != MIE) { - MachineInstr &MI = *MII; - int Opc = MI.getOpcode(); - if (Opc == Hexagon::CONST32_Int_Real && - MI.getOperand(1).isBlockAddress()) { - int DestReg = MI.getOperand(0).getReg(); - MachineOperand &Symbol = MI.getOperand(1); - - BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::LO), DestReg) - .addOperand(Symbol); - BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::HI), DestReg) - .addOperand(Symbol); - // MBB->erase returns the iterator to the next instruction, which is the - // one we want to process next - MII = MBB->erase(&MI); - continue; - } - - else if (Opc == Hexagon::CONST32_Int_Real || - Opc == Hexagon::CONST32_Float_Real) { - int DestReg = MI.getOperand(0).getReg(); - - // We have to convert an FP immediate into its corresponding integer - // representation - int64_t ImmValue; - if (Opc == Hexagon::CONST32_Float_Real) { - APFloat Val = MI.getOperand(1).getFPImm()->getValueAPF(); - ImmValue = *Val.bitcastToAPInt().getRawData(); - } - else - ImmValue = MI.getOperand(1).getImm(); - - BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi), - DestReg) + for (MachineBasicBlock &B : Fn) { + for (auto I = B.begin(), E = B.end(); I != E; ) { + MachineInstr &MI = *I; + ++I; + unsigned Opc = MI.getOpcode(); + + if (Opc == Hexagon::CONST32) { + unsigned DestReg = MI.getOperand(0).getReg(); + uint64_t ImmValue = MI.getOperand(1).getImm(); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestReg) .addImm(ImmValue); - MII = MBB->erase(&MI); - continue; - } - else if (Opc == Hexagon::CONST64_Int_Real || - Opc == Hexagon::CONST64_Float_Real) { - int DestReg = MI.getOperand(0).getReg(); - - // We have to convert an FP immediate into its corresponding integer - // representation - int64_t ImmValue; - if (Opc == Hexagon::CONST64_Float_Real) { - APFloat Val = MI.getOperand(1).getFPImm()->getValueAPF(); - ImmValue = *Val.bitcastToAPInt().getRawData(); - } - else - ImmValue = MI.getOperand(1).getImm(); - - unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg); - unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg); + B.erase(&MI); + } else if (Opc == Hexagon::CONST64) { + unsigned DestReg = MI.getOperand(0).getReg(); + int64_t ImmValue = MI.getOperand(1).getImm(); + const DebugLoc &DL = MI.getDebugLoc(); + unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo); + unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi); int32_t LowWord = (ImmValue & 0xFFFFFFFF); int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; - BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi), - DestLo) + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestLo) .addImm(LowWord); - BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi), - DestHi) + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestHi) .addImm(HighWord); - MII = MBB->erase(&MI); - continue; + B.erase(&MI); } - ++MII; } } return true; } -} //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// -FunctionPass * -llvm::createHexagonSplitConst32AndConst64() { +FunctionPass *llvm::createHexagonSplitConst32AndConst64() { return new HexagonSplitConst32AndConst64(); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp index 25b2aff..2c93721 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -9,32 +9,50 @@ #define DEBUG_TYPE "hsdr" +#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" -#include "HexagonTargetMachine.h" - +#include "HexagonSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <limits> #include <map> #include <set> +#include <utility> #include <vector> using namespace llvm; namespace llvm { + FunctionPass *createHexagonSplitDoubleRegs(); void initializeHexagonSplitDoubleRegsPass(PassRegistry&); -} + +} // end namespace llvm namespace { + static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1), cl::desc("Maximum number of split partitions")); static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true), @@ -43,18 +61,22 @@ namespace { class HexagonSplitDoubleRegs : public MachineFunctionPass { public: static char ID; + HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const override { + + StringRef getPassName() const override { return "Hexagon Split Double Registers"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -98,16 +120,17 @@ namespace { static void dump_partition(raw_ostream&, const USet&, const TargetRegisterInfo&); }; + char HexagonSplitDoubleRegs::ID; int HexagonSplitDoubleRegs::Counter = 0; const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC = &Hexagon::DoubleRegsRegClass; -} + +} // end anonymous namespace INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double", "Hexagon Split Double Registers", false, false) - void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os, const USet &Part, const TargetRegisterInfo &TRI) { dbgs() << '{'; @@ -116,7 +139,6 @@ void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os, dbgs() << " }"; } - bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const { for (auto I : IRM) { const USet &Rs = I.second; @@ -126,7 +148,6 @@ bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const { return false; } - bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const { for (auto &I : MI->memoperands()) if (I->isVolatile()) @@ -134,7 +155,6 @@ bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const { return false; } - bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { if (MI->mayLoad() || MI->mayStore()) if (MemRefsFixed || isVolatileInstr(MI)) @@ -170,7 +190,7 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { case Hexagon::A4_combineii: case Hexagon::A4_combineri: case Hexagon::A2_combinew: - case Hexagon::CONST64_Int_Real: + case Hexagon::CONST64: case Hexagon::A2_sxtw: @@ -194,7 +214,6 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { return false; } - void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { typedef std::map<unsigned,unsigned> UUMap; typedef std::vector<unsigned> UVect; @@ -283,7 +302,6 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { P2Rs[I.second].insert(I.first); } - static inline int32_t profitImm(unsigned Lo, unsigned Hi) { int32_t P = 0; bool LoZ1 = false, HiZ1 = false; @@ -296,7 +314,6 @@ static inline int32_t profitImm(unsigned Lo, unsigned Hi) { return P; } - int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { unsigned ImmX = 0; unsigned Opc = MI->getOpcode(); @@ -319,7 +336,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { return 2; case Hexagon::A2_tfrpi: - case Hexagon::CONST64_Int_Real: { + case Hexagon::CONST64: { uint64_t D = MI->getOperand(1).getImm(); unsigned Lo = D & 0xFFFFFFFFULL; unsigned Hi = D >> 32; @@ -337,6 +354,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { if (V == 0 || V == -1) return 10; // Fall through into A2_combinew. + LLVM_FALLTHROUGH; } case Hexagon::A2_combinew: return 2; @@ -371,7 +389,6 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { return 0; } - bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) const { unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0; @@ -380,7 +397,7 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) for (unsigned DR : Part) { MachineInstr *DefI = MRI->getVRegDef(DR); int32_t P = profit(DefI); - if (P == INT_MIN) + if (P == std::numeric_limits<int>::min()) return false; TotalP += P; // Reduce the profitability of splitting induction registers. @@ -413,7 +430,7 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) // Splittable instruction. SplitNum++; int32_t P = profit(UseI); - if (P == INT_MIN) + if (P == std::numeric_limits<int>::min()) return false; TotalP += P; } @@ -426,7 +443,6 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) return TotalP > 0; } - void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, USet &Rs) { const MachineBasicBlock *HB = L->getHeader(); @@ -436,11 +452,11 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, // Examine the latch branch. Expect it to be a conditional branch to // the header (either "br-cond header" or "br-cond exit; br header"). - MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TB = nullptr, *FB = nullptr; MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB); SmallVector<MachineOperand,2> Cond; bool BadLB = TII->analyzeBranch(*TmpLB, TB, FB, Cond, false); - // Only analyzable conditional branches. HII::AnalyzeBranch will put + // Only analyzable conditional branches. HII::analyzeBranch will put // the branch opcode as the first element of Cond, and the predicate // operand as the second. if (BadLB || Cond.size() != 2) @@ -451,7 +467,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, // Must go to the header. if (TB != HB && FB != HB) return; - assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch"); + assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch"); // Expect a predicate register. unsigned PR = Cond[1].getReg(); assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass); @@ -510,7 +526,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, } return true; }; - UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp); + UVect::iterator End = llvm::remove_if(DP, NoIndOp); Rs.insert(DP.begin(), End); Rs.insert(CmpR1); Rs.insert(CmpR2); @@ -522,7 +538,6 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, }); } - void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) { typedef std::vector<MachineLoop*> LoopVector; LoopVector WorkQ; @@ -544,7 +559,6 @@ void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) { } } - void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, const UUPairMap &PairMap, unsigned SubR) { MachineBasicBlock &B = *MI->getParent(); @@ -568,7 +582,7 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, SR = SubR; } else { const UUPair &P = F->second; - R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second; + R = (SubR == Hexagon::isub_lo) ? P.first : P.second; SR = 0; } } @@ -579,7 +593,6 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, } } - void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, const UUPairMap &PairMap) { bool Load = MI->mayLoad(); @@ -652,7 +665,6 @@ void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, } } - void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI, const UUPairMap &PairMap) { MachineOperand &Op0 = MI->getOperand(0); @@ -680,7 +692,6 @@ void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI, .addImm(int32_t(V >> 32)); } - void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI, const UUPairMap &PairMap) { MachineOperand &Op0 = MI->getOperand(0); @@ -713,7 +724,6 @@ void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI, llvm_unreachable("Unexpected operand"); } - void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI, const UUPairMap &PairMap) { MachineOperand &Op0 = MI->getOperand(0); @@ -734,9 +744,10 @@ void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI, .addImm(31); } - void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, const UUPairMap &PairMap) { + using namespace Hexagon; + MachineOperand &Op0 = MI->getOperand(0); MachineOperand &Op1 = MI->getOperand(1); MachineOperand &Op2 = MI->getOperand(2); @@ -750,7 +761,6 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, const UUPair &P = F->second; unsigned LoR = P.first; unsigned HiR = P.second; - using namespace Hexagon; unsigned Opc = MI->getOpcode(); bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p); @@ -762,8 +772,8 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, unsigned RS = getRegState(Op1); unsigned ShiftOpc = Left ? S2_asl_i_r : (Signed ? S2_asr_i_r : S2_lsr_i_r); - unsigned LoSR = subreg_loreg; - unsigned HiSR = subreg_hireg; + unsigned LoSR = isub_lo; + unsigned HiSR = isub_hi; if (S == 0) { // No shift, subregister copy. @@ -858,9 +868,10 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, } } - void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, const UUPairMap &PairMap) { + using namespace Hexagon; + MachineOperand &Op0 = MI->getOperand(0); MachineOperand &Op1 = MI->getOperand(1); MachineOperand &Op2 = MI->getOperand(2); @@ -875,7 +886,6 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, const UUPair &P = F->second; unsigned LoR = P.first; unsigned HiR = P.second; - using namespace Hexagon; MachineBasicBlock &B = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); @@ -883,8 +893,8 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, unsigned RS2 = getRegState(Op2); const TargetRegisterClass *IntRC = &IntRegsRegClass; - unsigned LoSR = subreg_loreg; - unsigned HiSR = subreg_hireg; + unsigned LoSR = isub_lo; + unsigned HiSR = isub_hi; // Op0 = S2_asl_i_p_or Op1, Op2, Op3 // means: Op0 = or (Op1, asl(Op2, Op3)) @@ -951,38 +961,38 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, } } - bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, const UUPairMap &PairMap) { + using namespace Hexagon; + DEBUG(dbgs() << "Splitting: " << *MI); bool Split = false; unsigned Opc = MI->getOpcode(); - using namespace Hexagon; switch (Opc) { case TargetOpcode::PHI: case TargetOpcode::COPY: { unsigned DstR = MI->getOperand(0).getReg(); if (MRI->getRegClass(DstR) == DoubleRC) { - createHalfInstr(Opc, MI, PairMap, subreg_loreg); - createHalfInstr(Opc, MI, PairMap, subreg_hireg); + createHalfInstr(Opc, MI, PairMap, isub_lo); + createHalfInstr(Opc, MI, PairMap, isub_hi); Split = true; } break; } case A2_andp: - createHalfInstr(A2_and, MI, PairMap, subreg_loreg); - createHalfInstr(A2_and, MI, PairMap, subreg_hireg); + createHalfInstr(A2_and, MI, PairMap, isub_lo); + createHalfInstr(A2_and, MI, PairMap, isub_hi); Split = true; break; case A2_orp: - createHalfInstr(A2_or, MI, PairMap, subreg_loreg); - createHalfInstr(A2_or, MI, PairMap, subreg_hireg); + createHalfInstr(A2_or, MI, PairMap, isub_lo); + createHalfInstr(A2_or, MI, PairMap, isub_hi); Split = true; break; case A2_xorp: - createHalfInstr(A2_xor, MI, PairMap, subreg_loreg); - createHalfInstr(A2_xor, MI, PairMap, subreg_hireg); + createHalfInstr(A2_xor, MI, PairMap, isub_lo); + createHalfInstr(A2_xor, MI, PairMap, isub_hi); Split = true; break; @@ -995,7 +1005,7 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, break; case A2_tfrpi: - case CONST64_Int_Real: + case CONST64: splitImmediate(MI, PairMap); Split = true; break; @@ -1034,7 +1044,6 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, return Split; } - void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap) { for (auto &Op : MI->operands()) { @@ -1046,10 +1055,10 @@ void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, continue; const UUPair &P = F->second; switch (Op.getSubReg()) { - case Hexagon::subreg_loreg: + case Hexagon::isub_lo: Op.setReg(P.first); break; - case Hexagon::subreg_hireg: + case Hexagon::isub_hi: Op.setReg(P.second); break; } @@ -1057,7 +1066,6 @@ void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, } } - void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap) { MachineBasicBlock &B = *MI->getParent(); @@ -1078,14 +1086,13 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, unsigned NewDR = MRI->createVirtualRegister(DoubleRC); BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR) .addReg(Pr.first) - .addImm(Hexagon::subreg_loreg) + .addImm(Hexagon::isub_lo) .addReg(Pr.second) - .addImm(Hexagon::subreg_hireg); + .addImm(Hexagon::isub_hi); Op.setReg(NewDR); } } - bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; typedef std::set<MachineInstr*> MISet; @@ -1146,7 +1153,6 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { return Changed; } - bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Splitting double registers in function: " << MF.getName() << '\n'); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp index 54bc3cf..af1bf48 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -23,33 +23,45 @@ #define DEBUG_TYPE "hexagon-widen-stores" -#include "HexagonTargetMachine.h" - -#include "llvm/PassSupport.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - #include <algorithm> - +#include <cassert> +#include <cstdint> +#include <iterator> +#include <vector> using namespace llvm; namespace llvm { + FunctionPass *createHexagonStoreWidening(); void initializeHexagonStoreWideningPass(PassRegistry&); -} + +} // end namespace llvm namespace { + struct HexagonStoreWidening : public MachineFunctionPass { const HexagonInstrInfo *TII; const HexagonRegisterInfo *TRI; @@ -59,15 +71,14 @@ namespace { public: static char ID; + HexagonStoreWidening() : MachineFunctionPass(ID) { initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const override { - return "Hexagon Store Widening"; - } + StringRef getPassName() const override { return "Hexagon Store Widening"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AAResultsWrapperPass>(); @@ -98,19 +109,18 @@ namespace { bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); }; -} // namespace - +char HexagonStoreWidening::ID = 0; -namespace { +} // end anonymous namespace // Some local helper functions... -unsigned getBaseAddressRegister(const MachineInstr *MI) { +static unsigned getBaseAddressRegister(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(0); assert(MO.isReg() && "Expecting register operand"); return MO.getReg(); } -int64_t getStoreOffset(const MachineInstr *MI) { +static int64_t getStoreOffset(const MachineInstr *MI) { unsigned OpC = MI->getOpcode(); assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); @@ -128,23 +138,17 @@ int64_t getStoreOffset(const MachineInstr *MI) { return 0; } -const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { +static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { assert(!MI->memoperands_empty() && "Expecting memory operands"); return **MI->memoperands_begin(); } -} // namespace - - -char HexagonStoreWidening::ID = 0; - INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", "Hexason Store Widening", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", "Hexagon Store Widening", false, false) - // Filtering function: any stores whose opcodes are not "approved" of by // this function will not be subjected to widening. inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { @@ -162,7 +166,6 @@ inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { } } - // Check if the machine memory operand MMO is aliased with any of the // stores in the store group Stores. bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, @@ -185,7 +188,6 @@ bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, return false; } - // Check if the machine instruction MI accesses any storage aliased with // any store in the group Stores. bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, @@ -196,7 +198,6 @@ bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, return false; } - // Inspect a machine basic block, and generate store groups out of stores // encountered in the block. // @@ -233,7 +234,6 @@ void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, } } - // Create a single store group. The stores need to be independent between // themselves, and also there cannot be other instructions between them // that could read or modify storage being stored into. @@ -263,7 +263,7 @@ void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, unsigned BR = getBaseAddressRegister(MI); if (BR == BaseReg) { Group.push_back(MI); - *I = 0; + *I = nullptr; continue; } } @@ -280,7 +280,6 @@ void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, } // for } - // Check if store instructions S1 and S2 are adjacent. More precisely, // S2 has to access memory immediately following that accessed by S1. bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, @@ -298,7 +297,6 @@ bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, : int(Off1+S1MO.getSize()) == Off2; } - /// Given a sequence of adjacent stores, and a maximum size of a single wide /// store, pick a group of stores that can be replaced by a single store /// of size not exceeding MaxSize. The selected sequence will be recorded @@ -390,7 +388,6 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, return true; } - /// Given an "old group" OG of stores, create a "new group" NG of instructions /// to replace them. Ideally, NG would only have a single instruction in it, /// but that may only be possible for store-immediate. @@ -419,7 +416,6 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, Shift += NBits; } - MachineInstr *FirstSt = OG.front(); DebugLoc DL = OG.back()->getDebugLoc(); const MachineMemOperand &OldM = getStoreTarget(FirstSt); @@ -471,7 +467,6 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, return true; } - // Replace instructions from the old group OG with instructions from the // new group NG. Conceptually, remove all instructions in OG, and then // insert all instructions in NG, starting at where the first instruction @@ -536,7 +531,6 @@ bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { return true; } - // Break up the group into smaller groups, each of which can be replaced by // a single wide store. Widen each such smaller group and replace the old // instructions with the widened ones. @@ -566,7 +560,6 @@ bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { return Changed; } - // Process a single basic block: create the store groups, and replace them // with the widened stores, if possible. Processing of each basic block // is independent from processing of any other basic block. This transfor- @@ -592,7 +585,6 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { return Changed; } - bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { if (skipFunction(*MFn.getFunction())) return false; @@ -612,8 +604,6 @@ bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { return Changed; } - FunctionPass *llvm::createHexagonStoreWidening() { return new HexagonStoreWidening(); } - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index fb315a7..8c23a24 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -66,9 +66,13 @@ static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", cl::desc("Disable Hexagon MI Scheduling")); static cl::opt<bool> EnableSubregLiveness("hexagon-subreg-liveness", - cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable subregister liveness tracking for Hexagon")); +static cl::opt<bool> OverrideLongCalls("hexagon-long-calls", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("If present, forces/disables the use of long calls")); + void HexagonSubtarget::initializeEnvironment() { UseMemOps = false; ModeIEEERndNear = false; @@ -77,7 +81,7 @@ void HexagonSubtarget::initializeEnvironment() { HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU); + CPUString = Hexagon_MC::selectHexagonCPU(getTargetTriple(), CPU); static std::map<StringRef, HexagonArchEnum> CpuTable { { "hexagonv4", V4 }, @@ -94,12 +98,15 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { UseHVXOps = false; UseHVXDblOps = false; + UseLongCalls = false; ParseSubtargetFeatures(CPUString, FS); if (EnableHexagonHVX.getPosition()) UseHVXOps = EnableHexagonHVX; if (EnableHexagonHVXDouble.getPosition()) UseHVXDblOps = EnableHexagonHVXDouble; + if (OverrideLongCalls.getPosition()) + UseLongCalls = OverrideLongCalls; return *this; } @@ -148,19 +155,19 @@ void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { // Update the latency of chain edges between v60 vector load or store // instructions to be 1. These instructions cannot be scheduled in the // same packet. - MachineInstr *MI1 = SU.getInstr(); + MachineInstr &MI1 = *SU.getInstr(); auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII); - bool IsStoreMI1 = MI1->mayStore(); - bool IsLoadMI1 = MI1->mayLoad(); + bool IsStoreMI1 = MI1.mayStore(); + bool IsLoadMI1 = MI1.mayLoad(); if (!QII->isV60VectorInstruction(MI1) || !(IsStoreMI1 || IsLoadMI1)) continue; for (auto &SI : SU.Succs) { if (SI.getKind() != SDep::Order || SI.getLatency() != 0) continue; - MachineInstr *MI2 = SI.getSUnit()->getInstr(); + MachineInstr &MI2 = *SI.getSUnit()->getInstr(); if (!QII->isV60VectorInstruction(MI2)) continue; - if ((IsStoreMI1 && MI2->mayStore()) || (IsLoadMI1 && MI2->mayLoad())) { + if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) { SI.setLatency(1); SU.setHeightDirty(); // Change the dependence in the opposite direction too. @@ -181,6 +188,11 @@ void HexagonSubtarget::getPostRAMutations( Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>()); } +void HexagonSubtarget::getSMSMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>()); +} + // Pin the vtable to this file. void HexagonSubtarget::anchor() {} @@ -196,8 +208,8 @@ bool HexagonSubtarget::enableSubRegLiveness() const { } // This helper function is responsible for increasing the latency only. -void HexagonSubtarget::updateLatency(MachineInstr *SrcInst, - MachineInstr *DstInst, SDep &Dep) const { +void HexagonSubtarget::updateLatency(MachineInstr &SrcInst, + MachineInstr &DstInst, SDep &Dep) const { if (!hasV60TOps()) return; @@ -231,19 +243,19 @@ static SUnit *getZeroLatency(SUnit *N, SmallVector<SDep, 4> &Deps) { /// Change the latency between the two SUnits. void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps, SUnit *Dst, unsigned Lat) const { - MachineInstr *SrcI = Src->getInstr(); + MachineInstr &SrcI = *Src->getInstr(); for (auto &I : Deps) { if (I.getSUnit() != Dst) continue; I.setLatency(Lat); SUnit *UpdateDst = I.getSUnit(); - updateLatency(SrcI, UpdateDst->getInstr(), I); + updateLatency(SrcI, *UpdateDst->getInstr(), I); // Update the latency of opposite edge too. for (auto &PI : UpdateDst->Preds) { if (PI.getSUnit() != Src || !PI.isAssignedRegDep()) continue; PI.setLatency(Lat); - updateLatency(SrcI, UpdateDst->getInstr(), PI); + updateLatency(SrcI, *UpdateDst->getInstr(), PI); } } } @@ -254,10 +266,14 @@ void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps, // ther others, if needed. bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII) const { - MachineInstr *SrcInst = Src->getInstr(); - MachineInstr *DstInst = Dst->getInstr(); + MachineInstr &SrcInst = *Src->getInstr(); + MachineInstr &DstInst = *Dst->getInstr(); + + // Ignore Boundary SU nodes as these have null instructions. + if (Dst->isBoundaryNode()) + return false; - if (SrcInst->isPHI() || DstInst->isPHI()) + if (SrcInst.isPHI() || DstInst.isPHI()) return false; // Check if the Dst instruction is the best candidate first. @@ -294,9 +310,9 @@ bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, // Update the latency of a Phi when the Phi bridges two instructions that // require a multi-cycle latency. -void HexagonSubtarget::changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, +void HexagonSubtarget::changePhiLatency(MachineInstr &SrcInst, SUnit *Dst, SDep &Dep) const { - if (!SrcInst->isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0) + if (!SrcInst.isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0) return; for (const SDep &PI : Dst->Preds) { @@ -319,7 +335,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *QII = static_cast<const HexagonInstrInfo *>(getInstrInfo()); // Instructions with .new operands have zero latency. - if (QII->canExecuteInBundle(SrcInst, DstInst) && + if (QII->canExecuteInBundle(*SrcInst, *DstInst) && isBestZeroLatency(Src, Dst, QII)) { Dep.setLatency(0); return; @@ -329,17 +345,17 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, return; // Don't adjust the latency of post-increment part of the instruction. - if (QII->isPostIncrement(SrcInst) && Dep.isAssignedRegDep()) { + if (QII->isPostIncrement(*SrcInst) && Dep.isAssignedRegDep()) { if (SrcInst->mayStore()) return; if (Dep.getReg() != SrcInst->getOperand(0).getReg()) return; - } else if (QII->isPostIncrement(DstInst) && Dep.getKind() == SDep::Anti) { + } else if (QII->isPostIncrement(*DstInst) && Dep.getKind() == SDep::Anti) { if (DstInst->mayStore()) return; if (Dep.getReg() != DstInst->getOperand(0).getReg()) return; - } else if (QII->isPostIncrement(DstInst) && DstInst->mayStore() && + } else if (QII->isPostIncrement(*DstInst) && DstInst->mayStore() && Dep.isAssignedRegDep()) { MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1); if (Op.isReg() && Dep.getReg() != Op.getReg()) @@ -348,7 +364,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, // Check if we need to change any the latency values when Phis are added. if (useBSBScheduling() && SrcInst->isPHI()) { - changePhiLatency(SrcInst, Dst, Dep); + changePhiLatency(*SrcInst, Dst, Dep); return; } @@ -358,12 +374,20 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, DstInst = Dst->Succs[0].getSUnit()->getInstr(); // Try to schedule uses near definitions to generate .cur. - if (EnableDotCurSched && QII->isToBeScheduledASAP(SrcInst, DstInst) && + if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) && isBestZeroLatency(Src, Dst, QII)) { Dep.setLatency(0); return; } - updateLatency(SrcInst, DstInst, Dep); + updateLatency(*SrcInst, *DstInst, Dep); +} + +unsigned HexagonSubtarget::getL1CacheLineSize() const { + return 32; +} + +unsigned HexagonSubtarget::getL1PrefetchDistance() const { + return 32; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 9b40c13..f2b9cda 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -34,6 +34,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); bool UseMemOps, UseHVXOps, UseHVXDblOps; + bool UseLongCalls; bool ModeIEEERndNear; public: @@ -101,6 +102,7 @@ public: bool useHVXOps() const { return UseHVXOps; } bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; } bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; } + bool useLongCalls() const { return UseLongCalls; } bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; @@ -128,19 +130,26 @@ public: std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const override; + void getSMSMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) + const override; + /// \brief Perform target specific adjustments to the latency of a schedule /// dependency. void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override; + unsigned getL1CacheLineSize() const; + unsigned getL1PrefetchDistance() const; + private: // Helper function responsible for increasing the latency only. - void updateLatency(MachineInstr *SrcInst, MachineInstr *DstInst, SDep &Dep) + void updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, SDep &Dep) const; void changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps, SUnit *Dst, unsigned Lat) const; bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII) const; - void changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, SDep &Dep) const; + void changePhiLatency(MachineInstr &SrcInst, SUnit *Dst, SDep &Dep) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td index 771498a..629a987 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td @@ -118,7 +118,7 @@ def Y2_isync: JRInst <(outs), (ins), let hasSideEffects = 0, isSolo = 1 in class J2_MISC_TRAP_PAUSE<string mnemonic, bits<2> MajOp> : JRInst - <(outs), (ins u8Imm:$u8), + <(outs), (ins u8_0Imm:$u8), #mnemonic#"(#$u8)"> { bits<8> u8; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index f964a66..132d12a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -27,7 +27,6 @@ using namespace llvm; - static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable RDF-based optimizations")); @@ -42,6 +41,9 @@ static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon CFG Optimization")); +static cl::opt<bool> DisableHCP("disable-hcp", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Disable Hexagon constant propagation")); + static cl::opt<bool> DisableStoreWidening("disable-store-widen", cl::Hidden, cl::init(false), cl::desc("Disable store widening")); @@ -68,6 +70,10 @@ static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true), cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " "predicate instructions")); +static cl::opt<bool> EnableLoopPrefetch("hexagon-loop-prefetch", + cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable loop data prefetch on Hexagon")); + static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden, cl::desc("Disable splitting double registers")); @@ -80,6 +86,10 @@ static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true), static cl::opt<bool> HexagonNoOpt("hexagon-noopt", cl::init(false), cl::Hidden, cl::desc("Disable backend optimizations")); +static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon Vector print instr pass")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -90,7 +100,7 @@ int HexagonTargetMachineModule = 0; extern "C" void LLVMInitializeHexagonTarget() { // Register the target. - RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); + RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget()); } static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { @@ -102,14 +112,17 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); namespace llvm { + extern char &HexagonExpandCondsetsID; + void initializeHexagonExpandCondsetsPass(PassRegistry&); + FunctionPass *createHexagonBitSimplify(); FunctionPass *createHexagonBranchRelaxation(); FunctionPass *createHexagonCallFrameInformation(); FunctionPass *createHexagonCFGOptimizer(); FunctionPass *createHexagonCommonGEP(); + FunctionPass *createHexagonConstPropagationPass(); FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonEarlyIfConversion(); - FunctionPass *createHexagonExpandCondsets(); FunctionPass *createHexagonFixupHwLoops(); FunctionPass *createHexagonGenExtract(); FunctionPass *createHexagonGenInsert(); @@ -128,6 +141,7 @@ namespace llvm { FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonSplitDoubleRegs(); FunctionPass *createHexagonStoreWidening(); + FunctionPass *createHexagonVectorPrint(); } // end namespace llvm; static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { @@ -152,6 +166,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM, (HexagonNoOpt ? CodeGenOpt::None : OL)), TLOF(make_unique<HexagonTargetObjectFile>()) { + initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); initAsmInfo(); } @@ -225,6 +240,8 @@ void HexagonPassConfig::addIRPasses() { addPass(createAtomicExpandPass(TM)); if (!NoOpt) { + if (EnableLoopPrefetch) + addPass(createLoopDataPrefetchPass()); if (EnableCommGEP) addPass(createHexagonCommonGEP()); // Replace certain combinations of shifts and ands with extracts. @@ -257,6 +274,11 @@ bool HexagonPassConfig::addInstSelector() { addPass(createHexagonBitSimplify(), false); addPass(createHexagonPeephole()); printAndVerify("After hexagon peephole pass"); + // Constant propagation. + if (!DisableHCP) { + addPass(createHexagonConstPropagationPass(), false); + addPass(&UnreachableMachineBlockElimID, false); + } if (EnableGenInsert) addPass(createHexagonGenInsert(), false); if (EnableEarlyIf) @@ -268,15 +290,15 @@ bool HexagonPassConfig::addInstSelector() { void HexagonPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { - if (EnableExpandCondsets) { - Pass *Exp = createHexagonExpandCondsets(); - insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp)); - } + if (EnableExpandCondsets) + insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID); if (!DisableStoreWidening) addPass(createHexagonStoreWidening(), false); if (!DisableHardwareLoops) addPass(createHexagonHardwareLoops(), false); } + if (TM->getOptLevel() >= CodeGenOpt::Default) + addPass(&MachinePipelinerID); } void HexagonPassConfig::addPostRegAlloc() { @@ -315,6 +337,8 @@ void HexagonPassConfig::addPreEmitPass() { addPass(createHexagonPacketizer(), false); } + if (EnableVectorPrint) + addPass(createHexagonVectorPrint(), false); // Add CFI instructions if necessary. addPass(createHexagonCallFrameInformation(), false); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 82b437e..c9c4f95 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -10,17 +10,27 @@ // This file contains the declarations of the HexagonTargetAsmInfo properties. // //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "hexagon-sdata" -#include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement", // (e.g. -debug and -debug-only=globallayout) #define TRACE_TO(s, X) s << X #ifdef NDEBUG -#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } \ + } while (false) #else -#define TRACE(X) \ - do { \ - if (TraceGVPlacement) { TRACE_TO(errs(), X); } \ - else { DEBUG( TRACE_TO(dbgs(), X) ); } \ - } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } else { \ + DEBUG(TRACE_TO(dbgs(), X)); \ + } \ + } while (false) #endif // Returns true if the section name is such that the symbol will be put @@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) { Sec.find(".scommon.") != StringRef::npos; } - static const char *getSectionSuffixForSize(unsigned Size) { switch (Size) { default: @@ -100,25 +117,23 @@ void HexagonTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_HEX_GPREL); } - MCSection *HexagonTargetObjectFile::SelectSectionForGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { - TRACE("[SelectSectionForGlobal] GV(" << GV->getName() << ") "); - TRACE("input section(" << GV->getSection() << ") "); - - TRACE((GV->hasPrivateLinkage() ? "private_linkage " : "") - << (GV->hasLocalLinkage() ? "local_linkage " : "") - << (GV->hasInternalLinkage() ? "internal " : "") - << (GV->hasExternalLinkage() ? "external " : "") - << (GV->hasCommonLinkage() ? "common_linkage " : "") - << (GV->hasCommonLinkage() ? "common " : "" ) + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + TRACE("[SelectSectionForGlobal] GO(" << GO->getName() << ") "); + TRACE("input section(" << GO->getSection() << ") "); + + TRACE((GO->hasPrivateLinkage() ? "private_linkage " : "") + << (GO->hasLocalLinkage() ? "local_linkage " : "") + << (GO->hasInternalLinkage() ? "internal " : "") + << (GO->hasExternalLinkage() ? "external " : "") + << (GO->hasCommonLinkage() ? "common_linkage " : "") + << (GO->hasCommonLinkage() ? "common " : "" ) << (Kind.isCommon() ? "kind_common " : "" ) << (Kind.isBSS() ? "kind_bss " : "" ) << (Kind.isBSSLocal() ? "kind_bss_local " : "" )); - if (isGlobalInSmallSection(GV, TM)) - return selectSmallSectionForGlobal(GV, Kind, Mang, TM); + if (isGlobalInSmallSection(GO, TM)) + return selectSmallSectionForGlobal(GO, Kind, TM); if (Kind.isCommon()) { // This is purely for LTO+Linker Script because commons don't really have a @@ -130,54 +145,49 @@ MCSection *HexagonTargetObjectFile::SelectSectionForGlobal( TRACE("default_ELF_section\n"); // Otherwise, we work the same as ELF. - return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, - Mang, TM); + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM); } - MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { - TRACE("[getExplicitSectionGlobal] GV(" << GV->getName() << ") from(" - << GV->getSection() << ") "); - TRACE((GV->hasPrivateLinkage() ? "private_linkage " : "") - << (GV->hasLocalLinkage() ? "local_linkage " : "") - << (GV->hasInternalLinkage() ? "internal " : "") - << (GV->hasExternalLinkage() ? "external " : "") - << (GV->hasCommonLinkage() ? "common_linkage " : "") - << (GV->hasCommonLinkage() ? "common " : "" ) + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + TRACE("[getExplicitSectionGlobal] GO(" << GO->getName() << ") from(" + << GO->getSection() << ") "); + TRACE((GO->hasPrivateLinkage() ? "private_linkage " : "") + << (GO->hasLocalLinkage() ? "local_linkage " : "") + << (GO->hasInternalLinkage() ? "internal " : "") + << (GO->hasExternalLinkage() ? "external " : "") + << (GO->hasCommonLinkage() ? "common_linkage " : "") + << (GO->hasCommonLinkage() ? "common " : "" ) << (Kind.isCommon() ? "kind_common " : "" ) << (Kind.isBSS() ? "kind_bss " : "" ) << (Kind.isBSSLocal() ? "kind_bss_local " : "" )); - if (GV->hasSection()) { - StringRef Section = GV->getSection(); + if (GO->hasSection()) { + StringRef Section = GO->getSection(); if (Section.find(".access.text.group") != StringRef::npos) - return getContext().getELFSection(GV->getSection(), ELF::SHT_PROGBITS, + return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); if (Section.find(".access.data.group") != StringRef::npos) - return getContext().getELFSection(GV->getSection(), ELF::SHT_PROGBITS, + return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); } - if (isGlobalInSmallSection(GV, TM)) - return selectSmallSectionForGlobal(GV, Kind, Mang, TM); + if (isGlobalInSmallSection(GO, TM)) + return selectSmallSectionForGlobal(GO, Kind, TM); // Otherwise, we work the same as ELF. TRACE("default_ELF_section\n"); - return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GV, Kind, - Mang, TM); + return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM); } - /// Return true if this global value should be placed into small data/bss /// section. -bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalValue *GV, +bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, const TargetMachine &TM) const { // Only global variables, not functions. DEBUG(dbgs() << "Checking if value is in small-data, -G" - << SmallDataThreshold << ": \"" << GV->getName() << "\": "); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + << SmallDataThreshold << ": \"" << GO->getName() << "\": "); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO); if (!GVar) { DEBUG(dbgs() << "no, not a global variable\n"); return false; @@ -238,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalValue *GV, return true; } - bool HexagonTargetObjectFile::isSmallDataEnabled() const { return SmallDataThreshold > 0; } - unsigned HexagonTargetObjectFile::getSmallDataSize() const { return SmallDataThreshold; } - /// Descends any type down to "elementary" components, /// discovering the smallest addressable one. /// If zero is returned, declaration will not be modified. @@ -302,12 +309,10 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty, return 0; } - MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { - const Type *GTy = GV->getType()->getElementType(); - unsigned Size = getSmallestAddressableSize(GTy, GV, TM); + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + const Type *GTy = GO->getType()->getElementType(); + unsigned Size = getSmallestAddressableSize(GTy, GO, TM); // If we have -ffunction-section or -fdata-section then we should emit the // global value to a unique section specifically for it... even for sdata. @@ -333,7 +338,7 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( if (EmitUniquedSection) { Name.append("."); - Name.append(GV->getName()); + Name.append(GO->getName()); } TRACE(" unique sbss(" << Name << ")\n"); return getContext().getELFSection(Name.str(), ELF::SHT_NOBITS, @@ -360,7 +365,7 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( // case the Kind could be wrong for it. if (Kind.isMergeableConst()) { TRACE(" const_object_as_data "); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO); if (GVar->hasSection() && isSmallDataSection(GVar->getSection())) Kind = SectionKind::getData(); } @@ -377,7 +382,7 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( if (EmitUniquedSection) { Name.append("."); - Name.append(GV->getName()); + Name.append(GO->getName()); } TRACE(" unique sdata(" << Name << ")\n"); return getContext().getELFSection(Name.str(), ELF::SHT_PROGBITS, @@ -386,6 +391,5 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( TRACE("default ELF section\n"); // Otherwise, we work the same as ELF. - return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, - Mang, TM); + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h index cbc00da..58dff2b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -19,14 +19,15 @@ namespace llvm { public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, const TargetMachine &TM) const override; + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; - MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, const TargetMachine &TM) const override; + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, + SectionKind Kind, + const TargetMachine &TM) const override; - bool isGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) - const; + bool isGlobalInSmallSection(const GlobalObject *GO, + const TargetMachine &TM) const; bool isSmallDataEnabled() const; @@ -39,8 +40,9 @@ namespace llvm { unsigned getSmallestAddressableSize(const Type *Ty, const GlobalValue *GV, const TargetMachine &TM) const; - MCSection *selectSmallSectionForGlobal(const GlobalValue *GV, - SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const; + MCSection *selectSmallSectionForGlobal(const GlobalObject *GO, + SectionKind Kind, + const TargetMachine &TM) const; }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index a05443e..d578bfa 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "HexagonTargetTransformInfo.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -36,3 +37,35 @@ void HexagonTTIImpl::getUnrollingPreferences(Loop *L, unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { return vector ? 0 : 32; } + +unsigned HexagonTTIImpl::getPrefetchDistance() const { + return getST()->getL1PrefetchDistance(); +} + +unsigned HexagonTTIImpl::getCacheLineSize() const { + return getST()->getL1CacheLineSize(); +} + +int HexagonTTIImpl::getUserCost(const User *U) { + auto isCastFoldedIntoLoad = [] (const CastInst *CI) -> bool { + if (!CI->isIntegerCast()) + return false; + const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0)); + // Technically, this code could allow multiple uses of the load, and + // check if all the uses are the same extension operation, but this + // should be sufficient for most cases. + if (!LI || !LI->hasOneUse()) + return false; + + // Only extensions from an integer type shorter than 32-bit to i32 + // can be folded into the load. + unsigned SBW = CI->getSrcTy()->getIntegerBitWidth(); + unsigned DBW = CI->getDestTy()->getIntegerBitWidth(); + return DBW == 32 && (SBW < DBW); + }; + + if (const CastInst *CI = dyn_cast<const CastInst>(U)) + if (isCastFoldedIntoLoad(CI)) + return TargetTransformInfo::TCC_Free; + return BaseT::getUserCost(U); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 71ae17a..8414bfc 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -40,13 +40,6 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - // Provide value semantics. MSVC requires that we spell all of these out. - HexagonTTIImpl(const HexagonTTIImpl &Arg) - : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} - HexagonTTIImpl(HexagonTTIImpl &&Arg) - : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), - TLI(std::move(Arg.TLI)) {} - /// \name Scalar TTI Implementations /// @{ @@ -55,6 +48,10 @@ public: // The Hexagon target can unroll loops with run-time trip counts. void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + // L1 cache prefetch. + unsigned getPrefetchDistance() const; + unsigned getCacheLineSize() const; + /// @} /// \name Vector TTI Implementations @@ -63,6 +60,8 @@ public: unsigned getNumberOfRegisters(bool vector) const; /// @} + + int getUserCost(const User *U); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index d326b94..7b1247d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -22,7 +22,6 @@ #include "HexagonVLIWPacketizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -75,13 +74,11 @@ namespace { AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { - return "Hexagon Packetizer"; - } + StringRef getPassName() const override { return "Hexagon Packetizer"; } bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } private: @@ -101,7 +98,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", false, false) - HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, const MachineBranchProbabilityInfo *MBPI) @@ -127,7 +123,7 @@ static bool hasWriteToReadDep(const MachineInstr &FirstI, } -static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, +static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI, MachineBasicBlock::iterator BundleIt, bool Before) { MachineBasicBlock::instr_iterator InsertPt; if (Before) @@ -135,20 +131,20 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, else InsertPt = std::next(BundleIt).getInstrIterator(); - MachineBasicBlock &B = *MI->getParent(); + MachineBasicBlock &B = *MI.getParent(); // The instruction should at least be bundled with the preceding instruction // (there will always be one, i.e. BUNDLE, if nothing else). - assert(MI->isBundledWithPred()); - if (MI->isBundledWithSucc()) { - MI->clearFlag(MachineInstr::BundledSucc); - MI->clearFlag(MachineInstr::BundledPred); + assert(MI.isBundledWithPred()); + if (MI.isBundledWithSucc()) { + MI.clearFlag(MachineInstr::BundledSucc); + MI.clearFlag(MachineInstr::BundledPred); } else { // If it's not bundled with the successor (i.e. it is the last one // in the bundle), then we can simply unbundle it from the predecessor, // which will take care of updating the predecessor's flag. - MI->unbundleFromPred(); + MI.unbundleFromPred(); } - B.splice(InsertPt, &B, MI); + B.splice(InsertPt, &B, MI.getIterator()); // Get the size of the bundle without asserting. MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator(); @@ -164,9 +160,9 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, // Otherwise, extract the single instruction out and delete the bundle. MachineBasicBlock::iterator NextIt = std::next(BundleIt); - MachineInstr *SingleI = BundleIt->getNextNode(); - SingleI->unbundleFromPred(); - assert(!SingleI->isBundledWithSucc()); + MachineInstr &SingleI = *BundleIt->getNextNode(); + SingleI.unbundleFromPred(); + assert(!SingleI.isBundledWithSucc()); BundleIt->eraseFromParent(); return NextIt; } @@ -267,7 +263,7 @@ bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { } -bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, +bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI, SDep::Kind DepType, unsigned DepReg) { // Check for LR dependence. if (DepReg == HRI->getRARegister()) @@ -284,11 +280,18 @@ bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, // Assumes that the first operand of the CALLr is the function address. if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { - MachineOperand MO = MI->getOperand(0); + const MachineOperand MO = MI.getOperand(0); if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) return true; } + if (HII->isJumpR(MI)) { + const MachineOperand &MO = HII->isPredicated(MI) ? MI.getOperand(1) + : MI.getOperand(0); + assert(MO.isReg() && MO.isUse()); + if (MO.getReg() == DepReg) + return true; + } return false; } @@ -297,54 +300,60 @@ static bool isRegDependence(const SDep::Kind DepType) { DepType == SDep::Output; } -static bool isDirectJump(const MachineInstr* MI) { - return MI->getOpcode() == Hexagon::J2_jump; +static bool isDirectJump(const MachineInstr &MI) { + return MI.getOpcode() == Hexagon::J2_jump; } -static bool isSchedBarrier(const MachineInstr* MI) { - switch (MI->getOpcode()) { +static bool isSchedBarrier(const MachineInstr &MI) { + switch (MI.getOpcode()) { case Hexagon::Y2_barrier: return true; } return false; } -static bool isControlFlow(const MachineInstr* MI) { - return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); +static bool isControlFlow(const MachineInstr &MI) { + return MI.getDesc().isTerminator() || MI.getDesc().isCall(); } /// Returns true if the instruction modifies a callee-saved register. -static bool doesModifyCalleeSavedReg(const MachineInstr *MI, +static bool doesModifyCalleeSavedReg(const MachineInstr &MI, const TargetRegisterInfo *TRI) { - const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineFunction &MF = *MI.getParent()->getParent(); for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) - if (MI->modifiesRegister(*CSR, TRI)) + if (MI.modifiesRegister(*CSR, TRI)) return true; return false; } -// TODO: MI->isIndirectBranch() and IsRegisterJump(MI) // Returns true if an instruction can be promoted to .new predicate or // new-value store. -bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) { - return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI); +bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI, + const TargetRegisterClass *NewRC) { + // Vector stores can be predicated, and can be new-value stores, but + // they cannot be predicated on a .new predicate value. + if (NewRC == &Hexagon::PredRegsRegClass) + if (HII->isV60VectorInstruction(MI) && MI.mayStore()) + return false; + return HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn() || + HII->mayBeNewStore(MI); } // Promote an instructiont to its .cur form. // At this time, we have already made a call to canPromoteToDotCur and made // sure that it can *indeed* be promoted. -bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI, +bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, const TargetRegisterClass* RC) { assert(DepType == SDep::Data); int CurOpcode = HII->getDotCurOp(MI); - MI->setDesc(HII->get(CurOpcode)); + MI.setDesc(HII->get(CurOpcode)); return true; } void HexagonPacketizerList::cleanUpDotCur() { - MachineInstr *MI = NULL; + MachineInstr *MI = nullptr; for (auto BI : CurrentPacketMIs) { DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { @@ -365,12 +374,12 @@ void HexagonPacketizerList::cleanUpDotCur() { } // Check to see if an instruction can be dot cur. -bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, +bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) { if (!HII->isV60VectorInstruction(MI)) return false; - if (!HII->isV60VectorInstruction(&*MII)) + if (!HII->isV60VectorInstruction(*MII)) return false; // Already a dot new instruction. @@ -386,14 +395,14 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, // Make sure candidate instruction uses cur. DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; - MI->dump(); + MI.dump(); dbgs() << "in packet\n";); MachineInstr &MJ = *MII; DEBUG({ dbgs() << "Checking CUR against "; MJ.dump(); }); - unsigned DestReg = MI->getOperand(0).getReg(); + unsigned DestReg = MI.getOperand(0).getReg(); bool FoundMatch = false; for (auto &MO : MJ.operands()) if (MO.isReg() && MO.getReg() == DestReg) @@ -409,7 +418,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, return false; } - DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump();); + DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump();); // We can convert the opcode into a .cur. return true; } @@ -417,7 +426,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, // Promote an instruction to its .new form. At this time, we have already // made a call to canPromoteToDotNew and made sure that it can *indeed* be // promoted. -bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, +bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, const TargetRegisterClass* RC) { assert (DepType == SDep::Data); @@ -426,16 +435,53 @@ bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, NewOpcode = HII->getDotNewPredOp(MI, MBPI); else NewOpcode = HII->getDotNewOp(MI); - MI->setDesc(HII->get(NewOpcode)); + MI.setDesc(HII->get(NewOpcode)); return true; } -bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) { - int NewOpcode = HII->getDotOldOp(MI->getOpcode()); - MI->setDesc(HII->get(NewOpcode)); +bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) { + int NewOpcode = HII->getDotOldOp(MI.getOpcode()); + MI.setDesc(HII->get(NewOpcode)); return true; } +bool HexagonPacketizerList::useCallersSP(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case Hexagon::S2_storerd_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + break; + default: + llvm_unreachable("Unexpected instruction"); + } + unsigned FrameSize = MF.getFrameInfo().getStackSize(); + MachineOperand &Off = MI.getOperand(1); + int64_t NewOff = Off.getImm() - (FrameSize + HEXAGON_LRFP_SIZE); + if (HII->isValidOffset(Opc, NewOff)) { + Off.setImm(NewOff); + return true; + } + return false; +} + +void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case Hexagon::S2_storerd_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + break; + default: + llvm_unreachable("Unexpected instruction"); + } + unsigned FrameSize = MF.getFrameInfo().getStackSize(); + MachineOperand &Off = MI.getOperand(1); + Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE); +} + enum PredicateKind { PK_False, PK_True, @@ -453,7 +499,7 @@ static PredicateKind getPredicateSense(const MachineInstr &MI, return PK_False; } -static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, +static const MachineOperand &getPostIncrementOperand(const MachineInstr &MI, const HexagonInstrInfo *HII) { assert(HII->isPostIncrement(MI) && "Not a post increment operation."); #ifndef NDEBUG @@ -461,22 +507,22 @@ static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, // list. Caution: Densemap initializes with the minimum of 64 buckets, // whereas there are at most 5 operands in the post increment. DenseSet<unsigned> DefRegsSet; - for (auto &MO : MI->operands()) + for (auto &MO : MI.operands()) if (MO.isReg() && MO.isDef()) DefRegsSet.insert(MO.getReg()); - for (auto &MO : MI->operands()) + for (auto &MO : MI.operands()) if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) return MO; #else - if (MI->mayLoad()) { - const MachineOperand &Op1 = MI->getOperand(1); + if (MI.mayLoad()) { + const MachineOperand &Op1 = MI.getOperand(1); // The 2nd operand is always the post increment operand in load. assert(Op1.isReg() && "Post increment operand has be to a register."); return Op1; } - if (MI->getDesc().mayStore()) { - const MachineOperand &Op0 = MI->getOperand(0); + if (MI.getDesc().mayStore()) { + const MachineOperand &Op0 = MI.getOperand(0); // The 1st operand is always the post increment operand in store. assert(Op0.isReg() && "Post increment operand has be to a register."); return Op0; @@ -487,13 +533,13 @@ static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, } // Get the value being stored. -static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) { +static const MachineOperand& getStoreValueOperand(const MachineInstr &MI) { // value being stored is always the last operand. - return MI->getOperand(MI->getNumOperands()-1); + return MI.getOperand(MI.getNumOperands()-1); } -static bool isLoadAbsSet(const MachineInstr *MI) { - unsigned Opc = MI->getOpcode(); +static bool isLoadAbsSet(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::L4_loadrd_ap: case Hexagon::L4_loadrb_ap: @@ -506,9 +552,9 @@ static bool isLoadAbsSet(const MachineInstr *MI) { return false; } -static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { +static const MachineOperand &getAbsSetOperand(const MachineInstr &MI) { assert(isLoadAbsSet(MI)); - return MI->getOperand(1); + return MI.getOperand(1); } @@ -529,8 +575,8 @@ static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { // if there is a new value store in the packet. Corollary: if there is // already a store in a packet, there can not be a new value store. // Arch Spec: 3.4.4.2 -bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, - const MachineInstr *PacketMI, unsigned DepReg) { +bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, + const MachineInstr &PacketMI, unsigned DepReg) { // Make sure we are looking at the store, that can be promoted. if (!HII->mayBeNewStore(MI)) return false; @@ -540,7 +586,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, if (Val.isReg() && Val.getReg() != DepReg) return false; - const MCInstrDesc& MCID = PacketMI->getDesc(); + const MCInstrDesc& MCID = PacketMI.getDesc(); // First operand is always the result. const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); @@ -563,7 +609,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, return false; } - if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() && + if (HII->isPostIncrement(PacketMI) && PacketMI.mayLoad() && getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { // If source is post_inc, or absolute-set addressing, it can not feed // into new value store @@ -578,8 +624,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, // If the source that feeds the store is predicated, new value store must // also be predicated. - if (HII->isPredicated(*PacketMI)) { - if (!HII->isPredicated(*MI)) + if (HII->isPredicated(PacketMI)) { + if (!HII->isPredicated(MI)) return false; // Check to make sure that they both will have their predicates @@ -589,7 +635,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, const TargetRegisterClass* predRegClass = nullptr; // Get predicate register used in the source instruction. - for (auto &MO : PacketMI->operands()) { + for (auto &MO : PacketMI.operands()) { if (!MO.isReg()) continue; predRegNumSrc = MO.getReg(); @@ -601,7 +647,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, "predicate register not found in a predicated PacketMI instruction"); // Get predicate register used in new-value store instruction. - for (auto &MO : MI->operands()) { + for (auto &MO : MI.operands()) { if (!MO.isReg()) continue; predRegNumDst = MO.getReg(); @@ -622,7 +668,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, // sense, i.e, either both should be negated or both should be non-negated. if (predRegNumDst != predRegNumSrc || HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || - getPredicateSense(*MI, HII) != getPredicateSense(*PacketMI, HII)) + getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) return false; } @@ -638,19 +684,19 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, for (auto I : CurrentPacketMIs) { SUnit *TempSU = MIToSUnit.find(I)->second; - MachineInstr* TempMI = TempSU->getInstr(); + MachineInstr &TempMI = *TempSU->getInstr(); // Following condition is true for all the instructions until PacketMI is // reached (StartCheck is set to 0 before the for loop). // StartCheck flag is 1 for all the instructions after PacketMI. - if (TempMI != PacketMI && !StartCheck) // Start processing only after - continue; // encountering PacketMI. + if (&TempMI != &PacketMI && !StartCheck) // Start processing only after + continue; // encountering PacketMI. StartCheck = 1; - if (TempMI == PacketMI) // We don't want to check PacketMI for dependence. + if (&TempMI == &PacketMI) // We don't want to check PacketMI for dependence. continue; - for (auto &MO : MI->operands()) + for (auto &MO : MI.operands()) if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) return false; } @@ -662,8 +708,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, // Eg. r0 = add(r0, #3) // memw(r1+r0<<#2) = r0 if (!HII->isPostIncrement(MI)) { - for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { - const MachineOperand &MO = MI->getOperand(opNum); + for (unsigned opNum = 0; opNum < MI.getNumOperands()-1; opNum++) { + const MachineOperand &MO = MI.getOperand(opNum); if (MO.isReg() && MO.getReg() == DepReg) return false; } @@ -673,7 +719,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, // do not newify the store. Eg. // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343] - for (auto &MO : PacketMI->operands()) { + for (auto &MO : PacketMI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) continue; unsigned R = MO.getReg(); @@ -686,7 +732,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, // just-in-case. For example, we cannot newify R2 in the following case: // %R3<def> = A2_tfrsi 0; // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>; - for (auto &MO : MI->operands()) { + for (auto &MO : MI.operands()) { if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) return false; } @@ -696,14 +742,14 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, } // Can this MI to promoted to either new value store or new value jump. -bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, +bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII) { if (!HII->mayBeNewStore(MI)) return false; // Check to see the store can be new value'ed. - MachineInstr *PacketMI = PacketSU->getInstr(); + MachineInstr &PacketMI = *PacketSU->getInstr(); if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) return true; @@ -712,8 +758,8 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, return false; } -static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { - for (auto &MO : I->operands()) +static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) { + for (auto &MO : I.operands()) if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) return true; return false; @@ -724,25 +770,25 @@ static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { // 1. dot new on predicate - V2/V3/V4 // 2. dot new on stores NV/ST - V4 // 3. dot new on jump NV/J - V4 -- This is generated in a pass. -bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, +bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, const TargetRegisterClass* RC) { // Already a dot new instruction. if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) return false; - if (!isNewifiable(MI)) + if (!isNewifiable(MI, RC)) return false; - const MachineInstr *PI = PacketSU->getInstr(); + const MachineInstr &PI = *PacketSU->getInstr(); // The "new value" cannot come from inline asm. - if (PI->isInlineAsm()) + if (PI.isInlineAsm()) return false; // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no // sense. - if (PI->isImplicitDef()) + if (PI.isImplicitDef()) return false; // If dependency is trough an implicitly defined register, we should not @@ -750,16 +796,14 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, if (isImplicitDependency(PI, DepReg)) return false; - const MCInstrDesc& MCID = PI->getDesc(); + const MCInstrDesc& MCID = PI.getDesc(); const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass) return false; // predicate .new - // bug 5670: until that is fixed - // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) if (RC == &Hexagon::PredRegsRegClass) - if (HII->isCondInst(MI) || MI->isReturn()) + if (HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn()) return HII->predCanBeUsedAsDotNew(PI, DepReg); if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) @@ -795,9 +839,9 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, // The P3 from a) and d) will be complements after // a)'s P3 is converted to .new form // Anti-dep between c) and b) is irrelevant for this case -bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, +bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr &MI, unsigned DepReg) { - SUnit *PacketSUDep = MIToSUnit.find(MI)->second; + SUnit *PacketSUDep = MIToSUnit.find(&MI)->second; for (auto I : CurrentPacketMIs) { // We only care for dependencies to predicated instructions @@ -889,7 +933,7 @@ bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1, // above example. Now I need to see if there is an anti dependency // from c) to any other instruction in the same packet on the pred // reg of interest. - if (restrictingDepExistInPacket(I, Dep.getReg())) + if (restrictingDepExistInPacket(*I, Dep.getReg())) return false; } } @@ -906,7 +950,7 @@ bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1, Hexagon::PredRegsRegClass.contains(PReg1) && Hexagon::PredRegsRegClass.contains(PReg2) && getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && - HII->isDotNewInst(&MI1) == HII->isDotNewInst(&MI2); + HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); } // Initialize packetizer flags. @@ -957,10 +1001,10 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: // trap, pause, barrier, icinva, isync, and syncht are solo instructions. // They must not be grouped with other instructions in a packet. - if (isSchedBarrier(&MI)) + if (isSchedBarrier(MI)) return true; - if (HII->isSolo(&MI)) + if (HII->isSolo(MI)) return true; if (MI.getOpcode() == Hexagon::A2_nop) @@ -977,9 +1021,9 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { // cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) // Doing the test only one way saves the amount of code in this function, // since every test would need to be repeated with the MI and MJ reversed. -static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, +static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, const HexagonInstrInfo &HII) { - const MachineFunction *MF = MI->getParent()->getParent(); + const MachineFunction *MF = MI.getParent()->getParent(); if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() && HII.isHVXMemWithAIndirect(MI, MJ)) return true; @@ -988,9 +1032,27 @@ static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, // able to remove the asm out after packetizing (i.e. if the asm must be // moved past the bundle). Similarly, two asms cannot be together to avoid // complications when determining their relative order outside of a bundle. - if (MI->isInlineAsm()) - return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() || - MJ->isCall() || MJ->isTerminator(); + if (MI.isInlineAsm()) + return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() || + MJ.isCall() || MJ.isTerminator(); + + switch (MI.getOpcode()) { + case (Hexagon::S2_storew_locked): + case (Hexagon::S4_stored_locked): + case (Hexagon::L2_loadw_locked): + case (Hexagon::L4_loadd_locked): + case (Hexagon::Y4_l2fetch): { + // These instructions can only be grouped with ALU32 or non-floating-point + // XTYPE instructions. Since there is no convenient way of identifying fp + // XTYPE instructions, only allow grouping with ALU32 for now. + unsigned TJ = HII.getType(MJ); + if (TJ != HexagonII::TypeALU32) + return true; + break; + } + default: + break; + } // "False" really means that the quick check failed to determine if // I and J cannot coexist. @@ -999,8 +1061,8 @@ static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, // Full, symmetric check. -bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI, - const MachineInstr *MJ) { +bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI, + const MachineInstr &MJ) { return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); } @@ -1010,10 +1072,10 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { MachineBasicBlock::instr_iterator NextI; for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { NextI = std::next(I); - MachineInstr *MI = &*I; - if (MI->isBundle()) + MachineInstr &MI = *I; + if (MI.isBundle()) BundleIt = I; - if (!MI->isInsideBundle()) + if (!MI.isInsideBundle()) continue; // Decide on where to insert the instruction that we are pulling out. @@ -1023,9 +1085,9 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { // other instructions in the bundle read, then we need to place it // after the bundle (to preserve the bundle semantics). bool InsertBeforeBundle; - if (MI->isInlineAsm()) - InsertBeforeBundle = !hasWriteToReadDep(*MI, *BundleIt, HRI); - else if (MI->isDebugValue()) + if (MI.isInlineAsm()) + InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI); + else if (MI.isDebugValue()) InsertBeforeBundle = true; else continue; @@ -1036,8 +1098,8 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { } // Check if a given instruction is of class "system". -static bool isSystemInstr(const MachineInstr *MI) { - unsigned Opc = MI->getOpcode(); +static bool isSystemInstr(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::Y2_barrier: case Hexagon::Y2_dcfetchbo: @@ -1046,24 +1108,24 @@ static bool isSystemInstr(const MachineInstr *MI) { return false; } -bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, - const MachineInstr *J) { +bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I, + const MachineInstr &J) { // The dependence graph may not include edges between dead definitions, // so without extra checks, we could end up packetizing two instruction // defining the same (dead) register. - if (I->isCall() || J->isCall()) + if (I.isCall() || J.isCall()) return false; - if (HII->isPredicated(*I) || HII->isPredicated(*J)) + if (HII->isPredicated(I) || HII->isPredicated(J)) return false; BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); - for (auto &MO : I->operands()) { + for (auto &MO : I.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.isDead()) continue; DeadDefs[MO.getReg()] = true; } - for (auto &MO : J->operands()) { + for (auto &MO : J.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.isDead()) continue; unsigned R = MO.getReg(); @@ -1073,8 +1135,8 @@ bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, return false; } -bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, - const MachineInstr *J) { +bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I, + const MachineInstr &J) { // A save callee-save register function call can only be in a packet // with instructions that don't write to the callee-save registers. if ((HII->isSaveCalleeSavedRegsCall(I) && @@ -1090,10 +1152,10 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot // contain a speculative indirect jump, // a new-value compare jump or a dealloc_return. - auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { - if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) + auto isBadForLoopN = [this] (const MachineInstr &MI) -> bool { + if (MI.isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) return true; - if (HII->isPredicated(*MI) && HII->isPredicatedNew(*MI) && HII->isJumpR(MI)) + if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) return true; return false; }; @@ -1106,13 +1168,13 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, // dealloc_return cannot appear in the same packet as a conditional or // unconditional jump. return HII->isDeallocRet(I) && - (J->isBranch() || J->isCall() || J->isBarrier()); + (J.isBranch() || J.isCall() || J.isBarrier()); } -bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, - const MachineInstr *J) { +bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I, + const MachineInstr &J) { bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); - bool StoreI = I->mayStore(), StoreJ = J->mayStore(); + bool StoreI = I.mayStore(), StoreJ = J.mayStore(); if ((SysI && StoreJ) || (SysJ && StoreI)) return true; @@ -1135,19 +1197,18 @@ bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, // SUJ is the current instruction inside the current packet against which that // SUI will be packetized. bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { - MachineInstr *I = SUI->getInstr(); - MachineInstr *J = SUJ->getInstr(); - assert(I && J && "Unable to packetize null instruction!"); + assert(SUI->getInstr() && SUJ->getInstr()); + MachineInstr &I = *SUI->getInstr(); + MachineInstr &J = *SUJ->getInstr(); // Clear IgnoreDepMIs when Packet starts. if (CurrentPacketMIs.size() == 1) IgnoreDepMIs.clear(); - MachineBasicBlock::iterator II = I; - const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + MachineBasicBlock::iterator II = I.getIterator(); // Solo instructions cannot go in the packet. - assert(!isSoloInstruction(*I) && "Unexpected solo instr!"); + assert(!isSoloInstruction(I) && "Unexpected solo instr!"); if (cannotCoexist(I, J)) return false; @@ -1164,23 +1225,23 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { return false; // If an instruction feeds new value jump, glue it. - MachineBasicBlock::iterator NextMII = I; + MachineBasicBlock::iterator NextMII = I.getIterator(); ++NextMII; - if (NextMII != I->getParent()->end() && HII->isNewValueJump(&*NextMII)) { + if (NextMII != I.getParent()->end() && HII->isNewValueJump(*NextMII)) { MachineInstr &NextMI = *NextMII; bool secondRegMatch = false; const MachineOperand &NOp0 = NextMI.getOperand(0); const MachineOperand &NOp1 = NextMI.getOperand(1); - if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) + if (NOp1.isReg() && I.getOperand(0).getReg() == NOp1.getReg()) secondRegMatch = true; - for (auto I : CurrentPacketMIs) { - SUnit *PacketSU = MIToSUnit.find(I)->second; - MachineInstr *PI = PacketSU->getInstr(); + for (auto T : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(T)->second; + MachineInstr &PI = *PacketSU->getInstr(); // NVJ can not be part of the dual jump - Arch Spec: section 7.8. - if (PI->isCall()) { + if (PI.isCall()) { Dependence = true; break; } @@ -1192,14 +1253,14 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 3. If the second operand of the nvj is newified, (which means // first operand is also a reg), first reg is not defined in // the same packet. - if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || + if (PI.getOpcode() == Hexagon::S2_allocframe || PI.mayStore() || HII->isLoopN(PI)) { Dependence = true; break; } // Check #2/#3. const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; - if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { + if (OpR.isReg() && PI.modifiesRegister(OpR.getReg(), HRI)) { Dependence = true; break; } @@ -1237,12 +1298,6 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // dealloc return unless we have dependencies on the explicit uses // of the registers used by jumpr (like r31) or dealloc return // (like r29 or r30). - // - // TODO: Currently, jumpr is handling only return of r31. So, the - // following logic (specificaly isCallDependent) is working fine. - // We need to enable jumpr for register other than r31 and then, - // we need to rework the last part, where it handles indirect call - // of that (isCallDependent) function. Bug 6216 is opened for this. unsigned DepReg = 0; const TargetRegisterClass *RC = nullptr; if (DepType == SDep::Data) { @@ -1250,7 +1305,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { RC = HRI->getMinimalPhysRegClass(DepReg); } - if (I->isCall() || I->isReturn() || HII->isTailCall(I)) { + if (I.isCall() || HII->isJumpR(I) || I.isReturn() || HII->isTailCall(I)) { if (!isRegDependence(DepType)) continue; if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) @@ -1283,8 +1338,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // For predicated instructions, if the predicates are complements then // there can be no dependence. - if (HII->isPredicated(*I) && HII->isPredicated(*J) && - arePredicatesComplements(*I, *J)) { + if (HII->isPredicated(I) && HII->isPredicated(J) && + arePredicatesComplements(I, J)) { // Not always safe to do this translation. // DAG Builder attempts to reduce dependence edges using transitive // nature of dependencies. Here is an example: @@ -1297,24 +1352,24 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // However, there is no dependence edge between (1)->(3). This results // in all 3 instructions going in the same packet. We ignore dependce // only once to avoid this situation. - auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J); + auto Itr = find(IgnoreDepMIs, &J); if (Itr != IgnoreDepMIs.end()) { Dependence = true; return false; } - IgnoreDepMIs.push_back(I); + IgnoreDepMIs.push_back(&I); continue; } // Ignore Order dependences between unconditional direct branches // and non-control-flow instructions. - if (isDirectJump(I) && !J->isBranch() && !J->isCall() && + if (isDirectJump(I) && !J.isBranch() && !J.isCall() && DepType == SDep::Order) continue; // Ignore all dependences for jumps except for true and output // dependences. - if (I->isConditionalBranch() && DepType != SDep::Data && + if (I.isConditionalBranch() && DepType != SDep::Data && DepType != SDep::Output) continue; @@ -1336,7 +1391,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { unsigned DepReg = SUJ->Succs[i].getReg(); // Check if I and J really defines DepReg. - if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg)) + if (!I.definesRegister(DepReg) && !J.definesRegister(DepReg)) continue; FoundSequentialDependence = true; break; @@ -1350,15 +1405,15 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 4. Load followed by any memory operation is allowed. if (DepType == SDep::Order) { if (!PacketizeVolatiles) { - bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef(); + bool OrdRefs = I.hasOrderedMemoryRef() || J.hasOrderedMemoryRef(); if (OrdRefs) { FoundSequentialDependence = true; break; } } // J is first, I is second. - bool LoadJ = J->mayLoad(), StoreJ = J->mayStore(); - bool LoadI = I->mayLoad(), StoreI = I->mayStore(); + bool LoadJ = J.mayLoad(), StoreJ = J.mayStore(); + bool LoadI = I.mayLoad(), StoreI = I.mayStore(); if (StoreJ) { // Two stores are only allowed on V4+. Load following store is never // allowed. @@ -1383,25 +1438,21 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // between ALLOCFRAME and subsequent store, allow it to be packetized // in a same packet. This implies that the store is using the caller's // SP. Hence, offset needs to be updated accordingly. - if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) { - unsigned Opc = I->getOpcode(); + if (DepType == SDep::Data && J.getOpcode() == Hexagon::S2_allocframe) { + unsigned Opc = I.getOpcode(); switch (Opc) { case Hexagon::S2_storerd_io: case Hexagon::S2_storeri_io: case Hexagon::S2_storerh_io: case Hexagon::S2_storerb_io: - if (I->getOperand(0).getReg() == HRI->getStackRegister()) { - int64_t Imm = I->getOperand(1).getImm(); - int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE); - if (HII->isValidOffset(Opc, NewOff)) { - GlueAllocframeStore = true; - // Since this store is to be glued with allocframe in the same - // packet, it will use SP of the previous stack frame, i.e. - // caller's SP. Therefore, we need to recalculate offset - // according to this change. - I->getOperand(1).setImm(NewOff); + if (I.getOperand(0).getReg() == HRI->getStackRegister()) { + // Since this store is to be glued with allocframe in the same + // packet, it will use SP of the previous stack frame, i.e. + // caller's SP. Therefore, we need to recalculate offset + // according to this change. + GlueAllocframeStore = useCallersSP(I); + if (GlueAllocframeStore) continue; - } } default: break; @@ -1414,12 +1465,12 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // R0 = ... ; SUI // Those cannot be packetized together, since the call will observe // the effect of the assignment to R0. - if (DepType == SDep::Anti && J->isCall()) { + if (DepType == SDep::Anti && J.isCall()) { // Check if I defines any volatile register. We should also check // registers that the call may read, but these happen to be a // subset of the volatile register set. - for (const MCPhysReg *P = J->getDesc().ImplicitDefs; P && *P; ++P) { - if (!I->modifiesRegister(*P, HRI)) + for (const MCPhysReg *P = J.getDesc().ImplicitDefs; P && *P; ++P) { + if (!I.modifiesRegister(*P, HRI)) continue; FoundSequentialDependence = true; break; @@ -1447,9 +1498,9 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { } bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { - MachineInstr *I = SUI->getInstr(); - MachineInstr *J = SUJ->getInstr(); - assert(I && J && "Unable to packetize null instruction!"); + assert(SUI->getInstr() && SUJ->getInstr()); + MachineInstr &I = *SUI->getInstr(); + MachineInstr &J = *SUJ->getInstr(); if (cannotCoexist(I, J)) return false; @@ -1467,16 +1518,15 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { // instruction. If so, restore its offset to its original value, i.e. use // current SP instead of caller's SP. if (GlueAllocframeStore) { - unsigned FrameSize = MF.getFrameInfo()->getStackSize(); - MachineOperand &MOff = I->getOperand(1); - MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE); + useCalleesSP(I); + GlueAllocframeStore = false; } return false; } MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr &MI) { - MachineBasicBlock::iterator MII = MI; + MachineBasicBlock::iterator MII = MI.getIterator(); MachineBasicBlock *MBB = MI.getParent(); if (MI.isImplicitDef()) { unsigned R = MI.getOperand(0).getReg(); @@ -1488,7 +1538,7 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { } assert(ResourceTracker->canReserveResources(MI)); - bool ExtMI = HII->isExtended(&MI) || HII->isConstExtended(&MI); + bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); bool Good = true; if (GlueToNewValueJump) { @@ -1501,7 +1551,7 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { if (ExtMI) Good = tryAllocateResourcesForConstExt(true); - bool ExtNvjMI = HII->isExtended(&NvjMI) || HII->isConstExtended(&NvjMI); + bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); if (Good) { if (ResourceTracker->canReserveResources(NvjMI)) ResourceTracker->reserveResources(NvjMI); @@ -1535,7 +1585,11 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { if (ExtMI && !tryAllocateResourcesForConstExt(true)) { endPacket(MBB, MI); if (PromotedToDotNew) - demoteToDotOld(&MI); + demoteToDotOld(MI); + if (GlueAllocframeStore) { + useCalleesSP(MI); + GlueAllocframeStore = false; + } ResourceTracker->reserveResources(MI); reserveResourcesForConstExt(); } @@ -1551,18 +1605,18 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, } bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { - return !producesStall(&MI); + return !producesStall(MI); } // Return true when ConsMI uses a register defined by ProdMI. -static bool isDependent(const MachineInstr *ProdMI, - const MachineInstr *ConsMI) { - if (!ProdMI->getOperand(0).isReg()) +static bool isDependent(const MachineInstr &ProdMI, + const MachineInstr &ConsMI) { + if (!ProdMI.getOperand(0).isReg()) return false; - unsigned DstReg = ProdMI->getOperand(0).getReg(); + unsigned DstReg = ProdMI.getOperand(0).getReg(); - for (auto &Op : ConsMI->operands()) + for (auto &Op : ConsMI.operands()) if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) // The MIs depend on each other. return true; @@ -1571,7 +1625,7 @@ static bool isDependent(const MachineInstr *ProdMI, } // V60 forward scheduling. -bool HexagonPacketizerList::producesStall(const MachineInstr *I) { +bool HexagonPacketizerList::producesStall(const MachineInstr &I) { // Check whether the previous packet is in a different loop. If this is the // case, there is little point in trying to avoid a stall because that would // favor the rare case (loop entry) over the common case (loop iteration). @@ -1581,7 +1635,7 @@ bool HexagonPacketizerList::producesStall(const MachineInstr *I) { // backedge. if (!OldPacketMIs.empty()) { auto *OldBB = OldPacketMIs.front()->getParent(); - auto *ThisBB = I->getParent(); + auto *ThisBB = I.getParent(); if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) return false; } @@ -1589,9 +1643,9 @@ bool HexagonPacketizerList::producesStall(const MachineInstr *I) { // Check for stall between two vector instructions. if (HII->isV60VectorInstruction(I)) { for (auto J : OldPacketMIs) { - if (!HII->isV60VectorInstruction(J)) + if (!HII->isV60VectorInstruction(*J)) continue; - if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I)) + if (isDependent(*J, I) && !HII->isVecUsableNextPacket(*J, I)) return true; } return false; @@ -1601,17 +1655,17 @@ bool HexagonPacketizerList::producesStall(const MachineInstr *I) { // there is no definition of a use in the current packet, because it // may be a candidate for .new. for (auto J : CurrentPacketMIs) - if (!HII->isV60VectorInstruction(J) && isDependent(J, I)) + if (!HII->isV60VectorInstruction(*J) && isDependent(*J, I)) return false; // Check for stall between I and instructions in the previous packet. if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) { for (auto J : OldPacketMIs) { - if (HII->isV60VectorInstruction(J)) + if (HII->isV60VectorInstruction(*J)) continue; - if (!HII->isLateInstrFeedsEarlyInstr(J, I)) + if (!HII->isLateInstrFeedsEarlyInstr(*J, I)) continue; - if (isDependent(J, I) && !HII->canExecuteInBundle(J, I)) + if (isDependent(*J, I) && !HII->canExecuteInBundle(*J, I)) return true; } } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 3f8ed5a..b28b926 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -73,42 +73,44 @@ public: void unpacketizeSoloInstrs(MachineFunction &MF); protected: - bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType, + bool isCallDependent(const MachineInstr &MI, SDep::Kind DepType, unsigned DepReg); - bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType, + bool promoteToDotCur(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); - bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU, + const TargetRegisterClass *RC); + bool canPromoteToDotCur(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); + const TargetRegisterClass *RC); void cleanUpDotCur(); - bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType, + bool promoteToDotNew(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); - bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU, + const TargetRegisterClass *RC); + bool canPromoteToDotNew(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); - bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU, + const TargetRegisterClass *RC); + bool canPromoteToNewValue(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII); - bool canPromoteToNewValueStore(const MachineInstr* MI, - const MachineInstr* PacketMI, unsigned DepReg); - bool demoteToDotOld(MachineInstr* MI); + bool canPromoteToNewValueStore(const MachineInstr &MI, + const MachineInstr &PacketMI, unsigned DepReg); + bool demoteToDotOld(MachineInstr &MI); + bool useCallersSP(MachineInstr &MI); + void useCalleesSP(MachineInstr &MI); bool arePredicatesComplements(MachineInstr &MI1, MachineInstr &MI2); - bool restrictingDepExistInPacket(MachineInstr*, unsigned); - bool isNewifiable(const MachineInstr *MI); - bool isCurifiable(MachineInstr* MI); - bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ); + bool restrictingDepExistInPacket(MachineInstr&, unsigned); + bool isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC); + bool isCurifiable(MachineInstr &MI); + bool cannotCoexist(const MachineInstr &MI, const MachineInstr &MJ); inline bool isPromotedToDotNew() const { return PromotedToDotNew; } bool tryAllocateResourcesForConstExt(bool Reserve); bool canReserveResourcesForConstExt(); void reserveResourcesForConstExt(); - bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J); - bool hasControlDependence(const MachineInstr *I, const MachineInstr *J); - bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J); - bool producesStall(const MachineInstr *MI); + bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J); + bool hasControlDependence(const MachineInstr &I, const MachineInstr &J); + bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J); + bool producesStall(const MachineInstr &MI); }; } // namespace llvm #endif // HEXAGONVLIWPACKETIZER_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp new file mode 100644 index 0000000..085d464 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp @@ -0,0 +1,209 @@ +//===-- HexagonVectorPrint.cpp - Generate vector printing instructions -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass adds the capability to generate pseudo vector/predicate register +// printing instructions. These pseudo instructions should be used with the +// simulator, NEVER on hardware. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-vector-print" + +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <string> +#include <vector> + +using namespace llvm; + +static cl::opt<bool> TraceHexVectorStoresOnly("trace-hex-vector-stores-only", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enables tracing of vector stores")); + +namespace llvm { + + FunctionPass *createHexagonVectorPrint(); + void initializeHexagonVectorPrintPass(PassRegistry&); + +} // end namespace llvm + +namespace { + +class HexagonVectorPrint : public MachineFunctionPass { + const HexagonSubtarget *QST; + const HexagonInstrInfo *QII; + const HexagonRegisterInfo *QRI; + +public: + static char ID; + + HexagonVectorPrint() + : MachineFunctionPass(ID), QST(nullptr), QII(nullptr), QRI(nullptr) { + initializeHexagonVectorPrintPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Hexagon VectorPrint pass"; } + + bool runOnMachineFunction(MachineFunction &Fn) override; +}; + +char HexagonVectorPrint::ID = 0; + +} // end anonymous namespace + +static bool isVecReg(unsigned Reg) { + return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) + || (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) + || (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3); +} + +static std::string getStringReg(unsigned R) { + if (R >= Hexagon::V0 && R <= Hexagon::V31) { + static const char* S[] = { "20", "21", "22", "23", "24", "25", "26", "27", + "28", "29", "2a", "2b", "2c", "2d", "2e", "2f", + "30", "31", "32", "33", "34", "35", "36", "37", + "38", "39", "3a", "3b", "3c", "3d", "3e", "3f"}; + return S[R-Hexagon::V0]; + } + if (R >= Hexagon::Q0 && R <= Hexagon::Q3) { + static const char* S[] = { "00", "01", "02", "03"}; + return S[R-Hexagon::Q0]; + + } + llvm_unreachable("valid vreg"); +} + +static void addAsmInstr(MachineBasicBlock *MBB, unsigned Reg, + MachineBasicBlock::instr_iterator I, + const DebugLoc &DL, const HexagonInstrInfo *QII, + MachineFunction &Fn) { + + std::string VDescStr = ".long 0x1dffe0" + getStringReg(Reg); + const char *cstr = Fn.createExternalSymbolName(VDescStr); + unsigned ExtraInfo = InlineAsm::Extra_HasSideEffects; + BuildMI(*MBB, I, DL, QII->get(TargetOpcode::INLINEASM)) + .addExternalSymbol(cstr) + .addImm(ExtraInfo); +} + +static bool getInstrVecReg(const MachineInstr &MI, unsigned &Reg) { + if (MI.getNumOperands() < 1) return false; + // Vec load or compute. + if (MI.getOperand(0).isReg() && MI.getOperand(0).isDef()) { + Reg = MI.getOperand(0).getReg(); + if (isVecReg(Reg)) + return !TraceHexVectorStoresOnly; + } + // Vec store. + if (MI.mayStore() && MI.getNumOperands() >= 3 && MI.getOperand(2).isReg()) { + Reg = MI.getOperand(2).getReg(); + if (isVecReg(Reg)) + return true; + } + // Vec store post increment. + if (MI.mayStore() && MI.getNumOperands() >= 4 && MI.getOperand(3).isReg()) { + Reg = MI.getOperand(3).getReg(); + if (isVecReg(Reg)) + return true; + } + return false; +} + +bool HexagonVectorPrint::runOnMachineFunction(MachineFunction &Fn) { + bool Changed = false; + QST = &Fn.getSubtarget<HexagonSubtarget>(); + QRI = QST->getRegisterInfo(); + QII = QST->getInstrInfo(); + std::vector<MachineInstr *> VecPrintList; + for (auto &MBB : Fn) + for (auto &MI : MBB) { + if (MI.isBundle()) { + MachineBasicBlock::instr_iterator MII = MI.getIterator(); + for (++MII; MII != MBB.instr_end() && MII->isInsideBundle(); ++MII) { + if (MII->getNumOperands() < 1) + continue; + unsigned Reg = 0; + if (getInstrVecReg(*MII, Reg)) { + VecPrintList.push_back((&*MII)); + DEBUG(dbgs() << "Found vector reg inside bundle \n"; MII->dump()); + } + } + } else { + unsigned Reg = 0; + if (getInstrVecReg(MI, Reg)) { + VecPrintList.push_back(&MI); + DEBUG(dbgs() << "Found vector reg \n"; MI.dump()); + } + } + } + + Changed = !VecPrintList.empty(); + if (!Changed) + return Changed; + + for (auto *I : VecPrintList) { + DebugLoc DL = I->getDebugLoc(); + MachineBasicBlock *MBB = I->getParent(); + DEBUG(dbgs() << "Evaluating V MI\n"; I->dump()); + unsigned Reg = 0; + if (!getInstrVecReg(*I, Reg)) + llvm_unreachable("Need a vector reg"); + MachineBasicBlock::instr_iterator MII = I->getIterator(); + if (I->isInsideBundle()) { + DEBUG(dbgs() << "add to end of bundle\n"; I->dump()); + while (MBB->instr_end() != MII && MII->isInsideBundle()) + MII++; + } else { + DEBUG(dbgs() << "add after instruction\n"; I->dump()); + MII++; + } + if (MBB->instr_end() == MII) + continue; + + if (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) { + DEBUG(dbgs() << "adding dump for V" << Reg-Hexagon::V0 << '\n'); + addAsmInstr(MBB, Reg, MII, DL, QII, Fn); + } else if (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) { + DEBUG(dbgs() << "adding dump for W" << Reg-Hexagon::W0 << '\n'); + addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2 + 1, + MII, DL, QII, Fn); + addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2, + MII, DL, QII, Fn); + } else if (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3) { + DEBUG(dbgs() << "adding dump for Q" << Reg-Hexagon::Q0 << '\n'); + addAsmInstr(MBB, Reg, MII, DL, QII, Fn); + } else + llvm_unreachable("Bad Vector reg"); + } + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// +INITIALIZE_PASS(HexagonVectorPrint, "hexagon-vector-print", + "Hexagon VectorPrint pass", false, false) + +FunctionPass *llvm::createHexagonVectorPrint() { + return new HexagonVectorPrint(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 2898b05..c140bd1 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -569,8 +569,8 @@ public: if (!Resolved) { switch ((unsigned)Fixup.getKind()) { case fixup_Hexagon_B22_PCREL: - // GetFixupCount assumes B22 won't relax - // Fallthrough + // GetFixupCount assumes B22 won't relax + LLVM_FALLTHROUGH; default: return false; break; @@ -745,7 +745,8 @@ public: namespace llvm { MCAsmBackend *createHexagonAsmBackend(Target const &T, MCRegisterInfo const & /*MRI*/, - const Triple &TT, StringRef CPU) { + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); return new HexagonAsmBackend(T, OSABI, CPU); } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index c63f044..4292f6b 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -41,7 +41,7 @@ namespace HexagonII { TypeST = 6, TypeSYSTEM = 7, TypeXTYPE = 8, - TypeMEMOP = 9, + TypeV4LDST = 9, TypeNV = 10, TypeDUPLEX = 11, TypeCOMPOUND = 12, @@ -92,7 +92,7 @@ namespace HexagonII { // MemAccessSize is represented as 1+log2(N) where N is size in bits. enum class MemAccessSize { - NoMemAccess = 0, // Not a memory acces instruction. + NoMemAccess = 0, // Not a memory access instruction. ByteAccess = 1, // Byte access instruction (memb). HalfWordAccess = 2, // Half word access instruction (memh). WordAccess = 3, // Word access instruction (memw). @@ -201,9 +201,12 @@ namespace HexagonII { AccumulatorPos = 54, AccumulatorMask = 0x1, - // Complex XU, prevent xu competition by prefering slot3 + // Complex XU, prevent xu competition by preferring slot3 PrefersSlot3Pos = 55, PrefersSlot3Mask = 0x1, + + CofMax1Pos = 60, + CofMax1Mask = 0x1 }; // *** The code above must match HexagonInstrFormat*.td *** // diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index 9e2c280..c619c36 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -22,7 +22,6 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = nullptr; // .xword is only supported by V9. - ZeroDirective = "\t.skip\t"; CommentString = "//"; LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 39b828d..2645a17 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -78,6 +78,9 @@ void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS, size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1; for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) { MCInst &HMI = const_cast<MCInst &>(*I.getInst()); + verifyInstructionPredicates(HMI, + computeAvailableFeatures(STI.getFeatureBits())); + EncodeSingleInstruction(HMI, OS, Fixups, STI, parseBits(Instruction, Last, HMB, HMI), Instruction); @@ -817,4 +820,5 @@ MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII, return new HexagonMCCodeEmitter(MII, MCT); } +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "HexagonGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index 2a154da..8e0667d 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -63,6 +63,11 @@ public: unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const; + +private: + uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; + void verifyInstructionPredicates(const MCInst &MI, + uint64_t AvailableFeatures) const; }; // class HexagonMCCodeEmitter } // namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index d194bea..9a09a17 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -1,5 +1,4 @@ - -//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===// +//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===// // // The LLVM Compiler Infrastructure // @@ -11,18 +10,17 @@ // This file is looks at a packet and tries to form compound insns // //===----------------------------------------------------------------------===// + #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCAssembler.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> using namespace llvm; using namespace Hexagon; @@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = { }; // enum HexagonII::CompoundGroup -namespace { -unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { +static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { unsigned DstReg, SrcReg, Src1Reg, Src2Reg; switch (MI.getOpcode()) { @@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { return HexagonII::HCG_None; } -} /// getCompoundOp - Return the index from 0-7 into the above opcode lists. -namespace { -unsigned getCompoundOp(MCInst const &HMCI) { +static unsigned getCompoundOp(MCInst const &HMCI) { const MCOperand &Predicate = HMCI.getOperand(0); unsigned PredReg = Predicate.getReg(); @@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) { return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; } } -} -namespace { -MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { - MCInst *CompoundInsn = 0; +static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, + MCInst const &R) { + MCInst *CompoundInsn = nullptr; unsigned compoundOpcode; MCOperand Rs, Rt; int64_t Value; @@ -290,8 +284,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { CompoundInsn = new (Context) MCInst; CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); - if (Value != -1) - CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(L.getOperand(2)); CompoundInsn->addOperand(R.getOperand(1)); break; @@ -309,8 +302,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { CompoundInsn = new (Context) MCInst; CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); - if (Value != -1) - CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(L.getOperand(2)); CompoundInsn->addOperand(R.getOperand(1)); break; @@ -338,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { return CompoundInsn; } -} /// Non-Symmetrical. See if these two instructions are fit for compound pair. -namespace { -bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, - MCInst const &MIb, bool IsExtendedB) { +static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, + MCInst const &MIb, bool IsExtendedB) { unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA); unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB); // We have two candidates - check that this is the same register @@ -355,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); } -} -namespace { -bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { +static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, + MCInst &MCI) { assert(HexagonMCInstrInfo::isBundle(MCI)); bool JExtended = false; for (MCInst::iterator J = @@ -369,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { JExtended = true; continue; } - if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) == - HexagonII::TypeJ) { + if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) { // Try to pair with another insn (B)undled with jump. bool BExtended = false; for (MCInst::iterator B = @@ -403,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { } return false; } -} /// tryCompound - Given a bundle check for compound insns when one /// is found update the contents fo the bundle with the compound insn. @@ -422,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, // a compound is found. while (lookForCompound(MCII, Context, MCI)) ; - - return; } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index 8833621..413f052 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -27,58 +27,58 @@ using namespace Hexagon; // pair table of subInstructions with opcodes static const std::pair<unsigned, unsigned> opcodeData[] = { - std::make_pair((unsigned)V4_SA1_addi, 0), - std::make_pair((unsigned)V4_SA1_addrx, 6144), - std::make_pair((unsigned)V4_SA1_addsp, 3072), - std::make_pair((unsigned)V4_SA1_and1, 4608), - std::make_pair((unsigned)V4_SA1_clrf, 6768), - std::make_pair((unsigned)V4_SA1_clrfnew, 6736), - std::make_pair((unsigned)V4_SA1_clrt, 6752), - std::make_pair((unsigned)V4_SA1_clrtnew, 6720), - std::make_pair((unsigned)V4_SA1_cmpeqi, 6400), - std::make_pair((unsigned)V4_SA1_combine0i, 7168), - std::make_pair((unsigned)V4_SA1_combine1i, 7176), - std::make_pair((unsigned)V4_SA1_combine2i, 7184), - std::make_pair((unsigned)V4_SA1_combine3i, 7192), - std::make_pair((unsigned)V4_SA1_combinerz, 7432), - std::make_pair((unsigned)V4_SA1_combinezr, 7424), - std::make_pair((unsigned)V4_SA1_dec, 4864), - std::make_pair((unsigned)V4_SA1_inc, 4352), - std::make_pair((unsigned)V4_SA1_seti, 2048), - std::make_pair((unsigned)V4_SA1_setin1, 6656), - std::make_pair((unsigned)V4_SA1_sxtb, 5376), - std::make_pair((unsigned)V4_SA1_sxth, 5120), - std::make_pair((unsigned)V4_SA1_tfr, 4096), - std::make_pair((unsigned)V4_SA1_zxtb, 5888), - std::make_pair((unsigned)V4_SA1_zxth, 5632), - std::make_pair((unsigned)V4_SL1_loadri_io, 0), - std::make_pair((unsigned)V4_SL1_loadrub_io, 4096), - std::make_pair((unsigned)V4_SL2_deallocframe, 7936), - std::make_pair((unsigned)V4_SL2_jumpr31, 8128), - std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133), - std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135), - std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132), - std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134), - std::make_pair((unsigned)V4_SL2_loadrb_io, 4096), - std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680), - std::make_pair((unsigned)V4_SL2_loadrh_io, 0), - std::make_pair((unsigned)V4_SL2_loadri_sp, 7168), - std::make_pair((unsigned)V4_SL2_loadruh_io, 2048), - std::make_pair((unsigned)V4_SL2_return, 8000), - std::make_pair((unsigned)V4_SL2_return_f, 8005), - std::make_pair((unsigned)V4_SL2_return_fnew, 8007), - std::make_pair((unsigned)V4_SL2_return_t, 8004), - std::make_pair((unsigned)V4_SL2_return_tnew, 8006), - std::make_pair((unsigned)V4_SS1_storeb_io, 4096), - std::make_pair((unsigned)V4_SS1_storew_io, 0), - std::make_pair((unsigned)V4_SS2_allocframe, 7168), - std::make_pair((unsigned)V4_SS2_storebi0, 4608), - std::make_pair((unsigned)V4_SS2_storebi1, 4864), - std::make_pair((unsigned)V4_SS2_stored_sp, 2560), - std::make_pair((unsigned)V4_SS2_storeh_io, 0), - std::make_pair((unsigned)V4_SS2_storew_sp, 2048), - std::make_pair((unsigned)V4_SS2_storewi0, 4096), - std::make_pair((unsigned)V4_SS2_storewi1, 4352)}; + std::make_pair((unsigned)SA1_addi, 0), + std::make_pair((unsigned)SA1_addrx, 6144), + std::make_pair((unsigned)SA1_addsp, 3072), + std::make_pair((unsigned)SA1_and1, 4608), + std::make_pair((unsigned)SA1_clrf, 6768), + std::make_pair((unsigned)SA1_clrfnew, 6736), + std::make_pair((unsigned)SA1_clrt, 6752), + std::make_pair((unsigned)SA1_clrtnew, 6720), + std::make_pair((unsigned)SA1_cmpeqi, 6400), + std::make_pair((unsigned)SA1_combine0i, 7168), + std::make_pair((unsigned)SA1_combine1i, 7176), + std::make_pair((unsigned)SA1_combine2i, 7184), + std::make_pair((unsigned)SA1_combine3i, 7192), + std::make_pair((unsigned)SA1_combinerz, 7432), + std::make_pair((unsigned)SA1_combinezr, 7424), + std::make_pair((unsigned)SA1_dec, 4864), + std::make_pair((unsigned)SA1_inc, 4352), + std::make_pair((unsigned)SA1_seti, 2048), + std::make_pair((unsigned)SA1_setin1, 6656), + std::make_pair((unsigned)SA1_sxtb, 5376), + std::make_pair((unsigned)SA1_sxth, 5120), + std::make_pair((unsigned)SA1_tfr, 4096), + std::make_pair((unsigned)SA1_zxtb, 5888), + std::make_pair((unsigned)SA1_zxth, 5632), + std::make_pair((unsigned)SL1_loadri_io, 0), + std::make_pair((unsigned)SL1_loadrub_io, 4096), + std::make_pair((unsigned)SL2_deallocframe, 7936), + std::make_pair((unsigned)SL2_jumpr31, 8128), + std::make_pair((unsigned)SL2_jumpr31_f, 8133), + std::make_pair((unsigned)SL2_jumpr31_fnew, 8135), + std::make_pair((unsigned)SL2_jumpr31_t, 8132), + std::make_pair((unsigned)SL2_jumpr31_tnew, 8134), + std::make_pair((unsigned)SL2_loadrb_io, 4096), + std::make_pair((unsigned)SL2_loadrd_sp, 7680), + std::make_pair((unsigned)SL2_loadrh_io, 0), + std::make_pair((unsigned)SL2_loadri_sp, 7168), + std::make_pair((unsigned)SL2_loadruh_io, 2048), + std::make_pair((unsigned)SL2_return, 8000), + std::make_pair((unsigned)SL2_return_f, 8005), + std::make_pair((unsigned)SL2_return_fnew, 8007), + std::make_pair((unsigned)SL2_return_t, 8004), + std::make_pair((unsigned)SL2_return_tnew, 8006), + std::make_pair((unsigned)SS1_storeb_io, 4096), + std::make_pair((unsigned)SS1_storew_io, 0), + std::make_pair((unsigned)SS2_allocframe, 7168), + std::make_pair((unsigned)SS2_storebi0, 4608), + std::make_pair((unsigned)SS2_storebi1, 4864), + std::make_pair((unsigned)SS2_stored_sp, 2560), + std::make_pair((unsigned)SS2_storeh_io, 0), + std::make_pair((unsigned)SS2_storew_sp, 2048), + std::make_pair((unsigned)SS2_storewi0, 4096), + std::make_pair((unsigned)SS2_storewi1, 4352)}; bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { @@ -262,25 +262,19 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { case Hexagon::EH_RETURN_JMPR: case Hexagon::J2_jumpr: - case Hexagon::JMPret: // jumpr r31 // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>. DstReg = MCI.getOperand(0).getReg(); - if (Hexagon::R31 == DstReg) { + if (Hexagon::R31 == DstReg) return HexagonII::HSIG_L2; - } break; case Hexagon::J2_jumprt: case Hexagon::J2_jumprf: case Hexagon::J2_jumprtnew: case Hexagon::J2_jumprfnew: - case Hexagon::JMPrett: - case Hexagon::JMPretf: - case Hexagon::JMPrettnew: - case Hexagon::JMPretfnew: - case Hexagon::JMPrettnewpt: - case Hexagon::JMPretfnewpt: + case Hexagon::J2_jumprtnewpt: + case Hexagon::J2_jumprfnewpt: DstReg = MCI.getOperand(1).getReg(); SrcReg = MCI.getOperand(0).getReg(); // [if ([!]p0[.new])] jumpr r31 @@ -679,6 +673,7 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst, case Hexagon::D9: case Hexagon::D10: case Hexagon::D11: + case Hexagon::P0: subInstPtr.addOperand(Inst.getOperand(opNum)); break; } @@ -699,54 +694,54 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); assert(Absolute);(void)Absolute; if (Value == 1) { - Result.setOpcode(Hexagon::V4_SA1_inc); + Result.setOpcode(Hexagon::SA1_inc); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; } // 1,2 SUBInst $Rd = add($Rs, #1) else if (Value == -1) { - Result.setOpcode(Hexagon::V4_SA1_dec); + Result.setOpcode(Hexagon::SA1_dec); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; } // 1,2 SUBInst $Rd = add($Rs,#-1) else if (Inst.getOperand(1).getReg() == Hexagon::R29) { - Result.setOpcode(Hexagon::V4_SA1_addsp); + Result.setOpcode(Hexagon::SA1_addsp); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; } // 1,3 SUBInst $Rd = add(r29, #$u6_2) else { - Result.setOpcode(Hexagon::V4_SA1_addi); + Result.setOpcode(Hexagon::SA1_addi); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; } // 1,2,3 SUBInst $Rx = add($Rx, #$s7) case Hexagon::A2_add: - Result.setOpcode(Hexagon::V4_SA1_addrx); + Result.setOpcode(Hexagon::SA1_addrx); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 1,2,3 SUBInst $Rx = add($_src_, $Rs) case Hexagon::S2_allocframe: - Result.setOpcode(Hexagon::V4_SS2_allocframe); + Result.setOpcode(Hexagon::SS2_allocframe); addOps(Result, Inst, 0); break; // 1 SUBInst allocframe(#$u5_3) case Hexagon::A2_andir: if (minConstant(Inst, 2) == 255) { - Result.setOpcode(Hexagon::V4_SA1_zxtb); + Result.setOpcode(Hexagon::SA1_zxtb); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 $Rd = and($Rs, #255) } else { - Result.setOpcode(Hexagon::V4_SA1_and1); + Result.setOpcode(Hexagon::SA1_and1); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst $Rd = and($Rs, #1) } case Hexagon::C2_cmpeqi: - Result.setOpcode(Hexagon::V4_SA1_cmpeqi); + Result.setOpcode(Hexagon::SA1_cmpeqi); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2) @@ -755,120 +750,115 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); assert(Absolute);(void)Absolute; if (Value == 1) { - Result.setOpcode(Hexagon::V4_SA1_combine1i); + Result.setOpcode(Hexagon::SA1_combine1i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#1, #$u2) } if (Value == 3) { - Result.setOpcode(Hexagon::V4_SA1_combine3i); + Result.setOpcode(Hexagon::SA1_combine3i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#3, #$u2) } if (Value == 0) { - Result.setOpcode(Hexagon::V4_SA1_combine0i); + Result.setOpcode(Hexagon::SA1_combine0i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#0, #$u2) } if (Value == 2) { - Result.setOpcode(Hexagon::V4_SA1_combine2i); + Result.setOpcode(Hexagon::SA1_combine2i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#2, #$u2) } case Hexagon::A4_combineir: - Result.setOpcode(Hexagon::V4_SA1_combinezr); + Result.setOpcode(Hexagon::SA1_combinezr); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#0, $Rs) case Hexagon::A4_combineri: - Result.setOpcode(Hexagon::V4_SA1_combinerz); + Result.setOpcode(Hexagon::SA1_combinerz); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst $Rdd = combine($Rs, #0) case Hexagon::L4_return_tnew_pnt: case Hexagon::L4_return_tnew_pt: - Result.setOpcode(Hexagon::V4_SL2_return_tnew); + Result.setOpcode(Hexagon::SL2_return_tnew); break; // none SUBInst if (p0.new) dealloc_return:nt case Hexagon::L4_return_fnew_pnt: case Hexagon::L4_return_fnew_pt: - Result.setOpcode(Hexagon::V4_SL2_return_fnew); + Result.setOpcode(Hexagon::SL2_return_fnew); break; // none SUBInst if (!p0.new) dealloc_return:nt case Hexagon::L4_return_f: - Result.setOpcode(Hexagon::V4_SL2_return_f); + Result.setOpcode(Hexagon::SL2_return_f); break; // none SUBInst if (!p0) dealloc_return case Hexagon::L4_return_t: - Result.setOpcode(Hexagon::V4_SL2_return_t); + Result.setOpcode(Hexagon::SL2_return_t); break; // none SUBInst if (p0) dealloc_return case Hexagon::L4_return: - Result.setOpcode(Hexagon::V4_SL2_return); + Result.setOpcode(Hexagon::SL2_return); break; // none SUBInst dealloc_return case Hexagon::L2_deallocframe: - Result.setOpcode(Hexagon::V4_SL2_deallocframe); + Result.setOpcode(Hexagon::SL2_deallocframe); break; // none SUBInst deallocframe case Hexagon::EH_RETURN_JMPR: case Hexagon::J2_jumpr: - case Hexagon::JMPret: - Result.setOpcode(Hexagon::V4_SL2_jumpr31); + Result.setOpcode(Hexagon::SL2_jumpr31); break; // none SUBInst jumpr r31 case Hexagon::J2_jumprf: - case Hexagon::JMPretf: - Result.setOpcode(Hexagon::V4_SL2_jumpr31_f); + Result.setOpcode(Hexagon::SL2_jumpr31_f); break; // none SUBInst if (!p0) jumpr r31 case Hexagon::J2_jumprfnew: - case Hexagon::JMPretfnewpt: - case Hexagon::JMPretfnew: - Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew); + case Hexagon::J2_jumprfnewpt: + Result.setOpcode(Hexagon::SL2_jumpr31_fnew); break; // none SUBInst if (!p0.new) jumpr:nt r31 case Hexagon::J2_jumprt: - case Hexagon::JMPrett: - Result.setOpcode(Hexagon::V4_SL2_jumpr31_t); + Result.setOpcode(Hexagon::SL2_jumpr31_t); break; // none SUBInst if (p0) jumpr r31 case Hexagon::J2_jumprtnew: - case Hexagon::JMPrettnewpt: - case Hexagon::JMPrettnew: - Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew); + case Hexagon::J2_jumprtnewpt: + Result.setOpcode(Hexagon::SL2_jumpr31_tnew); break; // none SUBInst if (p0.new) jumpr:nt r31 case Hexagon::L2_loadrb_io: - Result.setOpcode(Hexagon::V4_SL2_loadrb_io); + Result.setOpcode(Hexagon::SL2_loadrb_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 1,2,3 SUBInst $Rd = memb($Rs + #$u3_0) case Hexagon::L2_loadrd_io: - Result.setOpcode(Hexagon::V4_SL2_loadrd_sp); + Result.setOpcode(Hexagon::SL2_loadrd_sp); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = memd(r29 + #$u5_3) case Hexagon::L2_loadrh_io: - Result.setOpcode(Hexagon::V4_SL2_loadrh_io); + Result.setOpcode(Hexagon::SL2_loadrh_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 1,2,3 SUBInst $Rd = memh($Rs + #$u3_1) case Hexagon::L2_loadrub_io: - Result.setOpcode(Hexagon::V4_SL1_loadrub_io); + Result.setOpcode(Hexagon::SL1_loadrub_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 1,2,3 SUBInst $Rd = memub($Rs + #$u4_0) case Hexagon::L2_loadruh_io: - Result.setOpcode(Hexagon::V4_SL2_loadruh_io); + Result.setOpcode(Hexagon::SL2_loadruh_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1) case Hexagon::L2_loadri_io: if (Inst.getOperand(1).getReg() == Hexagon::R29) { - Result.setOpcode(Hexagon::V4_SL2_loadri_sp); + Result.setOpcode(Hexagon::SL2_loadri_sp); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 2 1,3 SUBInst $Rd = memw(r29 + #$u5_2) } else { - Result.setOpcode(Hexagon::V4_SL1_loadri_io); + Result.setOpcode(Hexagon::SL1_loadri_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); @@ -878,29 +868,29 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); assert(Absolute);(void)Absolute; if (Value == 0) { - Result.setOpcode(Hexagon::V4_SS2_storebi0); + Result.setOpcode(Hexagon::SS2_storebi0); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0 } else if (Value == 1) { - Result.setOpcode(Hexagon::V4_SS2_storebi1); + Result.setOpcode(Hexagon::SS2_storebi1); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1 } case Hexagon::S2_storerb_io: - Result.setOpcode(Hexagon::V4_SS1_storeb_io); + Result.setOpcode(Hexagon::SS1_storeb_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt case Hexagon::S2_storerd_io: - Result.setOpcode(Hexagon::V4_SS2_stored_sp); + Result.setOpcode(Hexagon::SS2_stored_sp); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 2,3 SUBInst memd(r29 + #$s6_3) = $Rtt case Hexagon::S2_storerh_io: - Result.setOpcode(Hexagon::V4_SS2_storeh_io); + Result.setOpcode(Hexagon::SS2_storeh_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); @@ -909,84 +899,88 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); assert(Absolute);(void)Absolute; if (Value == 0) { - Result.setOpcode(Hexagon::V4_SS2_storewi0); + Result.setOpcode(Hexagon::SS2_storewi0); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0 } else if (Value == 1) { - Result.setOpcode(Hexagon::V4_SS2_storewi1); + Result.setOpcode(Hexagon::SS2_storewi1); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#1 } else if (Inst.getOperand(0).getReg() == Hexagon::R29) { - Result.setOpcode(Hexagon::V4_SS2_storew_sp); + Result.setOpcode(Hexagon::SS2_storew_sp); addOps(Result, Inst, 1); addOps(Result, Inst, 2); break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt } case Hexagon::S2_storeri_io: if (Inst.getOperand(0).getReg() == Hexagon::R29) { - Result.setOpcode(Hexagon::V4_SS2_storew_sp); + Result.setOpcode(Hexagon::SS2_storew_sp); addOps(Result, Inst, 1); addOps(Result, Inst, 2); // 1,2,3 SUBInst memw(sp + #$u5_2) = $Rt } else { - Result.setOpcode(Hexagon::V4_SS1_storew_io); + Result.setOpcode(Hexagon::SS1_storew_io); addOps(Result, Inst, 0); addOps(Result, Inst, 1); addOps(Result, Inst, 2); // 1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt } break; case Hexagon::A2_sxtb: - Result.setOpcode(Hexagon::V4_SA1_sxtb); + Result.setOpcode(Hexagon::SA1_sxtb); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst $Rd = sxtb($Rs) case Hexagon::A2_sxth: - Result.setOpcode(Hexagon::V4_SA1_sxth); + Result.setOpcode(Hexagon::SA1_sxth); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst $Rd = sxth($Rs) case Hexagon::A2_tfr: - Result.setOpcode(Hexagon::V4_SA1_tfr); + Result.setOpcode(Hexagon::SA1_tfr); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst $Rd = $Rs case Hexagon::C2_cmovenewif: - Result.setOpcode(Hexagon::V4_SA1_clrfnew); + Result.setOpcode(Hexagon::SA1_clrfnew); addOps(Result, Inst, 0); + addOps(Result, Inst, 1); break; // 2 SUBInst if (!p0.new) $Rd = #0 case Hexagon::C2_cmovenewit: - Result.setOpcode(Hexagon::V4_SA1_clrtnew); + Result.setOpcode(Hexagon::SA1_clrtnew); addOps(Result, Inst, 0); + addOps(Result, Inst, 1); break; // 2 SUBInst if (p0.new) $Rd = #0 case Hexagon::C2_cmoveif: - Result.setOpcode(Hexagon::V4_SA1_clrf); + Result.setOpcode(Hexagon::SA1_clrf); addOps(Result, Inst, 0); + addOps(Result, Inst, 1); break; // 2 SUBInst if (!p0) $Rd = #0 case Hexagon::C2_cmoveit: - Result.setOpcode(Hexagon::V4_SA1_clrt); + Result.setOpcode(Hexagon::SA1_clrt); addOps(Result, Inst, 0); + addOps(Result, Inst, 1); break; // 2 SUBInst if (p0) $Rd = #0 case Hexagon::A2_tfrsi: Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); if (Absolute && Value == -1) { - Result.setOpcode(Hexagon::V4_SA1_setin1); + Result.setOpcode(Hexagon::SA1_setin1); addOps(Result, Inst, 0); break; // 2 1 SUBInst $Rd = #-1 } else { - Result.setOpcode(Hexagon::V4_SA1_seti); + Result.setOpcode(Hexagon::SA1_seti); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst $Rd = #$u6 } case Hexagon::A2_zxtb: - Result.setOpcode(Hexagon::V4_SA1_zxtb); + Result.setOpcode(Hexagon::SA1_zxtb); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 $Rd = and($Rs, #255) case Hexagon::A2_zxth: - Result.setOpcode(Hexagon::V4_SA1_zxth); + Result.setOpcode(Hexagon::SA1_zxth); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst $Rd = zxth($Rs) @@ -1022,7 +1016,7 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, k = j + distance; (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) { - // Check if reversable. + // Check if reversible. bool bisReversable = true; if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) && isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) { diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 67dcb8f..226470c 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -13,20 +13,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hexagonmcelfstreamer" -#include "Hexagon.h" -#include "HexagonMCELFStreamer.h" -#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <cstdint> using namespace llvm; @@ -148,8 +155,10 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol( } namespace llvm { + MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *CE) { return new HexagonMCELFStreamer(Context, MAB, OS, CE); } -} + +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h index d77c0cd..0ac1a68 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// -#ifndef HEXAGONMCELFSTREAMER_H -#define HEXAGONMCELFSTREAMER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCELFSTREAMER_H +#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCELFSTREAMER_H -#include "MCTargetDesc/HexagonMCCodeEmitter.h" -#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/MC/MCELFStreamer.h" -#include "HexagonTargetStreamer.h" +#include "llvm/MC/MCInstrInfo.h" +#include <cstdint> +#include <memory> namespace llvm { @@ -27,8 +27,7 @@ public: : MCELFStreamer(Context, TAB, OS, Emitter), MCII(createHexagonMCInstrInfo()) {} - virtual void EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; void EmitSymbol(const MCInst &Inst); void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, @@ -40,6 +39,6 @@ public: MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *CE); -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCELFSTREAMER_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 941cbd6..e627f02 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -306,7 +306,7 @@ int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, return 0; } -char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, +StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, MCInst const &MCI) { return MCII.getName(MCI.getOpcode()); } @@ -431,6 +431,11 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); } +bool HexagonMCInstrInfo::isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::CofMax1Pos) & HexagonII::CofMax1Mask); +} + bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII, MCInst const &MCI) { return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND); @@ -602,58 +607,58 @@ bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) { switch (MCI.getOpcode()) { default: return false; - case Hexagon::V4_SA1_addi: - case Hexagon::V4_SA1_addrx: - case Hexagon::V4_SA1_addsp: - case Hexagon::V4_SA1_and1: - case Hexagon::V4_SA1_clrf: - case Hexagon::V4_SA1_clrfnew: - case Hexagon::V4_SA1_clrt: - case Hexagon::V4_SA1_clrtnew: - case Hexagon::V4_SA1_cmpeqi: - case Hexagon::V4_SA1_combine0i: - case Hexagon::V4_SA1_combine1i: - case Hexagon::V4_SA1_combine2i: - case Hexagon::V4_SA1_combine3i: - case Hexagon::V4_SA1_combinerz: - case Hexagon::V4_SA1_combinezr: - case Hexagon::V4_SA1_dec: - case Hexagon::V4_SA1_inc: - case Hexagon::V4_SA1_seti: - case Hexagon::V4_SA1_setin1: - case Hexagon::V4_SA1_sxtb: - case Hexagon::V4_SA1_sxth: - case Hexagon::V4_SA1_tfr: - case Hexagon::V4_SA1_zxtb: - case Hexagon::V4_SA1_zxth: - case Hexagon::V4_SL1_loadri_io: - case Hexagon::V4_SL1_loadrub_io: - case Hexagon::V4_SL2_deallocframe: - case Hexagon::V4_SL2_jumpr31: - case Hexagon::V4_SL2_jumpr31_f: - case Hexagon::V4_SL2_jumpr31_fnew: - case Hexagon::V4_SL2_jumpr31_t: - case Hexagon::V4_SL2_jumpr31_tnew: - case Hexagon::V4_SL2_loadrb_io: - case Hexagon::V4_SL2_loadrd_sp: - case Hexagon::V4_SL2_loadrh_io: - case Hexagon::V4_SL2_loadri_sp: - case Hexagon::V4_SL2_loadruh_io: - case Hexagon::V4_SL2_return: - case Hexagon::V4_SL2_return_f: - case Hexagon::V4_SL2_return_fnew: - case Hexagon::V4_SL2_return_t: - case Hexagon::V4_SL2_return_tnew: - case Hexagon::V4_SS1_storeb_io: - case Hexagon::V4_SS1_storew_io: - case Hexagon::V4_SS2_allocframe: - case Hexagon::V4_SS2_storebi0: - case Hexagon::V4_SS2_storebi1: - case Hexagon::V4_SS2_stored_sp: - case Hexagon::V4_SS2_storeh_io: - case Hexagon::V4_SS2_storew_sp: - case Hexagon::V4_SS2_storewi0: - case Hexagon::V4_SS2_storewi1: + case Hexagon::SA1_addi: + case Hexagon::SA1_addrx: + case Hexagon::SA1_addsp: + case Hexagon::SA1_and1: + case Hexagon::SA1_clrf: + case Hexagon::SA1_clrfnew: + case Hexagon::SA1_clrt: + case Hexagon::SA1_clrtnew: + case Hexagon::SA1_cmpeqi: + case Hexagon::SA1_combine0i: + case Hexagon::SA1_combine1i: + case Hexagon::SA1_combine2i: + case Hexagon::SA1_combine3i: + case Hexagon::SA1_combinerz: + case Hexagon::SA1_combinezr: + case Hexagon::SA1_dec: + case Hexagon::SA1_inc: + case Hexagon::SA1_seti: + case Hexagon::SA1_setin1: + case Hexagon::SA1_sxtb: + case Hexagon::SA1_sxth: + case Hexagon::SA1_tfr: + case Hexagon::SA1_zxtb: + case Hexagon::SA1_zxth: + case Hexagon::SL1_loadri_io: + case Hexagon::SL1_loadrub_io: + case Hexagon::SL2_deallocframe: + case Hexagon::SL2_jumpr31: + case Hexagon::SL2_jumpr31_f: + case Hexagon::SL2_jumpr31_fnew: + case Hexagon::SL2_jumpr31_t: + case Hexagon::SL2_jumpr31_tnew: + case Hexagon::SL2_loadrb_io: + case Hexagon::SL2_loadrd_sp: + case Hexagon::SL2_loadrh_io: + case Hexagon::SL2_loadri_sp: + case Hexagon::SL2_loadruh_io: + case Hexagon::SL2_return: + case Hexagon::SL2_return_f: + case Hexagon::SL2_return_fnew: + case Hexagon::SL2_return_t: + case Hexagon::SL2_return_tnew: + case Hexagon::SS1_storeb_io: + case Hexagon::SS1_storew_io: + case Hexagon::SS2_allocframe: + case Hexagon::SS2_storebi0: + case Hexagon::SS2_storebi1: + case Hexagon::SS2_stored_sp: + case Hexagon::SS2_storeh_io: + case Hexagon::SS2_storew_sp: + case Hexagon::SS2_storewi0: + case Hexagon::SS2_storewi1: return true; } } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 58a8f68..d701c3a 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -133,7 +133,7 @@ int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI); int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI); // Return instruction name -char const *getName(MCInstrInfo const &MCII, MCInst const &MCI); +StringRef getName(MCInstrInfo const &MCII, MCInst const &MCI); // Return the operand index for the new value. unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI); @@ -170,6 +170,7 @@ bool isBundle(MCInst const &MCI); // Return whether the insn is an actual insn. bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI); bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI); // Return the duplex iclass given the two duplex classes diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 35a1a23..694cf58 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -11,22 +11,29 @@ // //===----------------------------------------------------------------------===// -#include "HexagonMCTargetDesc.h" #include "Hexagon.h" -#include "HexagonMCAsmInfo.h" -#include "HexagonMCELFStreamer.h" +#include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonMCAsmInfo.h" +#include "MCTargetDesc/HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include <cassert> +#include <cstdint> +#include <new> +#include <string> using namespace llvm; @@ -59,7 +66,6 @@ static cl::opt<bool> HexagonV55ArchVariant("mv55", cl::Hidden, cl::init(false), static cl::opt<bool> HexagonV60ArchVariant("mv60", cl::Hidden, cl::init(false), cl::desc("Build for Hexagon V60")); - static StringRef DefaultArch = "hexagonv60"; static StringRef HexagonGetArchVariant() { @@ -74,7 +80,7 @@ static StringRef HexagonGetArchVariant() { return ""; } -StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) { +StringRef Hexagon_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) { StringRef ArchV = HexagonGetArchVariant(); if (!ArchV.empty() && !CPU.empty()) { if (ArchV != CPU) @@ -103,17 +109,19 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) { static MCSubtargetInfo * createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU); + CPU = Hexagon_MC::selectHexagonCPU(TT, CPU); return createHexagonMCSubtargetInfoImpl(TT, CPU, FS); } namespace { + class HexagonTargetAsmStreamer : public HexagonTargetStreamer { public: HexagonTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &, bool, MCInstPrinter &) : HexagonTargetStreamer(S) {} + void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS, const MCInst &Inst, const MCSubtargetInfo &STI) override { assert(HexagonMCInstrInfo::isBundle(Inst)); @@ -145,14 +153,9 @@ public: OS << "\t}" << PacketBundle.second; } }; -} -namespace { class HexagonTargetELFStreamer : public HexagonTargetStreamer { public: - MCELFStreamer &getStreamer() { - return static_cast<MCELFStreamer &>(Streamer); - } HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI) : HexagonTargetStreamer(S) { auto Bits = STI.getFeatureBits(); @@ -167,6 +170,11 @@ public: Flags = ELF::EF_HEXAGON_MACH_V4; getStreamer().getAssembler().setELFHeaderEFlags(Flags); } + + MCELFStreamer &getStreamer() { + return static_cast<MCELFStreamer &>(Streamer); + } + void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, unsigned AccessSize) override { @@ -175,6 +183,7 @@ public: HexagonELFStreamer.HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize); } + void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, unsigned AccessSize) override { @@ -184,7 +193,8 @@ public: Symbol, Size, ByteAlignment, AccessSize); } }; -} + +} // end anonymous namespace static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT) { @@ -230,39 +240,39 @@ createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) { // Force static initialization. extern "C" void LLVMInitializeHexagonTargetMC() { // Register the MC asm info. - RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo); + RegisterMCAsmInfoFn X(getTheHexagonTarget(), createHexagonMCAsmInfo); // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, + TargetRegistry::RegisterMCInstrInfo(getTheHexagonTarget(), createHexagonMCInstrInfo); // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheHexagonTarget, + TargetRegistry::RegisterMCRegInfo(getTheHexagonTarget(), createHexagonMCRegisterInfo); // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget, + TargetRegistry::RegisterMCSubtargetInfo(getTheHexagonTarget(), createHexagonMCSubtargetInfo); // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget, + TargetRegistry::RegisterMCCodeEmitter(getTheHexagonTarget(), createHexagonMCCodeEmitter); // Register the asm backend - TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget, + TargetRegistry::RegisterMCAsmBackend(getTheHexagonTarget(), createHexagonAsmBackend); // Register the obj streamer - TargetRegistry::RegisterELFStreamer(TheHexagonTarget, createMCStreamer); + TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(), createMCStreamer); // Register the asm streamer - TargetRegistry::RegisterAsmTargetStreamer(TheHexagonTarget, + TargetRegistry::RegisterAsmTargetStreamer(getTheHexagonTarget(), createMCAsmTargetStreamer); // Register the MC Inst Printer - TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, + TargetRegistry::RegisterMCInstPrinter(getTheHexagonTarget(), createHexagonMCInstPrinter); TargetRegistry::RegisterObjectTargetStreamer( - TheHexagonTarget, createHexagonObjectTargetStreamer); + getTheHexagonTarget(), createHexagonObjectTargetStreamer); } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index a005a01..6e677e9 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -14,11 +14,11 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H -#include <cstdint> - #include "llvm/Support/CommandLine.h" +#include <cstdint> namespace llvm { + struct InstrItinerary; struct InstrStage; class MCAsmBackend; @@ -28,13 +28,14 @@ class MCInstrInfo; class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; +class MCTargetOptions; class Target; class Triple; class StringRef; class raw_ostream; class raw_pwrite_stream; -extern Target TheHexagonTarget; +Target &getTheHexagonTarget(); extern cl::opt<bool> HexagonDisableCompound; extern cl::opt<bool> HexagonDisableDuplex; extern const InstrStage HexagonStages[]; @@ -47,16 +48,19 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createHexagonAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, StringRef CPU); -namespace HEXAGON_MC { +namespace Hexagon_MC { + StringRef selectHexagonCPU(const Triple &TT, StringRef CPU); -} -} // End llvm namespace +} // end namespace Hexagon_MC + +} // end namespace llvm // Define symbolic names for Hexagon registers. This defines a mapping from // register name to register number. @@ -72,4 +76,4 @@ namespace HEXAGON_MC { #define GET_SUBTARGETINFO_ENUM #include "HexagonGenSubtargetInfo.inc" -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 4e1cce3..88f37d6 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -171,7 +171,7 @@ bool HexagonShuffler::check() { unsigned slotJump = slotFirstJump; unsigned slotLoadStore = slotFirstLoadStore; // Number of branches, solo branches, indirect branches. - unsigned jumps = 0, jump1 = 0, jumpr = 0; + unsigned jumps = 0, jump1 = 0; // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; @@ -207,6 +207,8 @@ bool HexagonShuffler::check() { ++pSlot3Cnt; slot3ISJ = ISJ; } + if (HexagonMCInstrInfo::isCofMax1(MCII, *ID)) + ++jump1; switch (HexagonMCInstrInfo::getType(MCII, *ID)) { case HexagonII::TypeXTYPE: @@ -214,8 +216,6 @@ bool HexagonShuffler::check() { ++xtypeFloat; break; case HexagonII::TypeJR: - ++jumpr; - // Fall-through. case HexagonII::TypeJ: ++jumps; break; @@ -244,7 +244,7 @@ bool HexagonShuffler::check() { if (ISJ->Core.getUnits() == slotSingleStore) ++store0; break; - case HexagonII::TypeMEMOP: + case HexagonII::TypeV4LDST: ++loads; ++stores; ++store1; @@ -304,7 +304,7 @@ bool HexagonShuffler::check() { if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() || HexagonMCInstrInfo::getDesc(MCII, *ID).isCall()) if (jumps > 1) { - if (jumpr || slotJump < slotLastJump) { + if (slotJump < slotLastJump) { // Error if indirect branch with another branch or // no more slots available for branches. Error = SHUFFLE_ERROR_BRANCHES; diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp index 61a83da..3928716 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -32,43 +32,19 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { case TargetOpcode::COPY: { const MachineOperand &Dst = MI->getOperand(0); const MachineOperand &Src = MI->getOperand(1); - RegisterRef DstR = { Dst.getReg(), Dst.getSubReg() }; - RegisterRef SrcR = { Src.getReg(), Src.getSubReg() }; - if (TargetRegisterInfo::isVirtualRegister(DstR.Reg)) { - if (!TargetRegisterInfo::isVirtualRegister(SrcR.Reg)) - return false; - MachineRegisterInfo &MRI = DFG.getMF().getRegInfo(); - if (MRI.getRegClass(DstR.Reg) != MRI.getRegClass(SrcR.Reg)) - return false; - } else if (TargetRegisterInfo::isPhysicalRegister(DstR.Reg)) { - if (!TargetRegisterInfo::isPhysicalRegister(SrcR.Reg)) - return false; - const TargetRegisterInfo &TRI = DFG.getTRI(); - if (TRI.getMinimalPhysRegClass(DstR.Reg) != - TRI.getMinimalPhysRegClass(SrcR.Reg)) - return false; - } else { - // Copy between some unknown objects. + RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg()); + RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg()); + assert(TargetRegisterInfo::isPhysicalRegister(DstR.Reg)); + assert(TargetRegisterInfo::isPhysicalRegister(SrcR.Reg)); + const TargetRegisterInfo &TRI = DFG.getTRI(); + if (TRI.getMinimalPhysRegClass(DstR.Reg) != + TRI.getMinimalPhysRegClass(SrcR.Reg)) return false; - } EM.insert(std::make_pair(DstR, SrcR)); return true; } - case TargetOpcode::REG_SEQUENCE: { - const MachineOperand &Dst = MI->getOperand(0); - RegisterRef DefR = { Dst.getReg(), Dst.getSubReg() }; - SmallVector<TargetInstrInfo::RegSubRegPairAndIdx,2> Inputs; - const TargetInstrInfo &TII = DFG.getTII(); - if (!TII.getRegSequenceInputs(*MI, 0, Inputs)) - return false; - for (auto I : Inputs) { - unsigned S = DFG.getTRI().composeSubRegIndices(DefR.Sub, I.SubIdx); - RegisterRef DR = { DefR.Reg, S }; - RegisterRef SR = { I.Reg, I.SubReg }; - EM.insert(std::make_pair(DR, SR)); - } - return true; - } + case TargetOpcode::REG_SEQUENCE: + llvm_unreachable("Unexpected REG_SEQUENCE"); } return false; } @@ -79,7 +55,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) { Copies.push_back(SA.Id); for (auto I : EM) { - auto FS = DefM.find(I.second); + auto FS = DefM.find(I.second.Reg); if (FS == DefM.end() || FS->second.empty()) continue; // Undefined source RDefMap[I.second][SA.Id] = FS->second.top()->Id; @@ -92,7 +68,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) { void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) { RegisterSet RRs; for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) - RRs.insert(RA.Addr->getRegRef()); + RRs.insert(RA.Addr->getRegRef(DFG)); bool Common = false; for (auto &R : RDefMap) { if (!RRs.count(R.first)) @@ -106,7 +82,7 @@ void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) { for (auto &R : RDefMap) { if (!RRs.count(R.first)) continue; - auto F = DefM.find(R.first); + auto F = DefM.find(R.first.Reg); if (F == DefM.end() || F->second.empty()) continue; R.second[IA.Id] = F->second.top()->Id; @@ -168,6 +144,18 @@ bool CopyPropagation::run() { bool HasLimit = CpLimit.getNumOccurrences() > 0; #endif + auto MinPhysReg = [this] (RegisterRef RR) -> unsigned { + const TargetRegisterInfo &TRI = DFG.getTRI(); + const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg); + if ((RC.LaneMask & RR.Mask) == RC.LaneMask) + return RR.Reg; + for (MCSubRegIndexIterator S(RR.Reg, &TRI); S.isValid(); ++S) + if (RR.Mask == TRI.getSubRegIndexLaneMask(S.getSubRegIndex())) + return S.getSubReg(); + llvm_unreachable("Should have found a register"); + return 0; + }; + for (auto C : Copies) { #ifndef NDEBUG if (HasLimit && CpCount >= CpLimit) @@ -180,7 +168,7 @@ bool CopyPropagation::run() { EqualityMap &EM = FS->second; for (NodeAddr<DefNode*> DA : SA.Addr->members_if(DFG.IsDef, DFG)) { - RegisterRef DR = DA.Addr->getRegRef(); + RegisterRef DR = DA.Addr->getRegRef(DFG); auto FR = EM.find(DR); if (FR == EM.end()) continue; @@ -197,7 +185,7 @@ bool CopyPropagation::run() { uint16_t F = UA.Addr->getFlags(); if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed)) continue; - if (UA.Addr->getRegRef() != DR) + if (UA.Addr->getRegRef(DFG) != DR) continue; NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG); @@ -214,8 +202,9 @@ bool CopyPropagation::run() { << *NodeAddr<StmtNode*>(IA).Addr->getCode(); } - Op.setReg(SR.Reg); - Op.setSubReg(SR.Sub); + unsigned NewReg = MinPhysReg(SR); + Op.setReg(NewReg); + Op.setSubReg(0); DFG.unlinkUse(UA, false); if (RDefSR_SA != 0) { UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(RDefSR_SA)); diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h index e8a576c..5ece11b 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h +++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h @@ -1,4 +1,4 @@ -//===--- RDFCopy.h --------------------------------------------------------===// +//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,27 +7,31 @@ // //===----------------------------------------------------------------------===// -#ifndef RDF_COPY_H -#define RDF_COPY_H +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H +#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #include "RDFGraph.h" #include <map> #include <vector> namespace llvm { + class MachineBasicBlock; class MachineDominatorTree; class MachineInstr; namespace rdf { + struct CopyPropagation { CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg), Trace(false) {} - virtual ~CopyPropagation() {} + + virtual ~CopyPropagation() = default; bool run(); void trace(bool On) { Trace = On; } bool trace() const { return Trace; } + DataFlowGraph &getDFG() { return DFG; } typedef std::map<RegisterRef, RegisterRef> EqualityMap; virtual bool interpretAsCopy(const MachineInstr *MI, EqualityMap &EM); @@ -48,7 +52,9 @@ namespace rdf { void updateMap(NodeAddr<InstrNode*> IA); bool scanBlock(MachineBasicBlock *B); }; -} // namespace rdf -} // namespace llvm -#endif +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp index 273d6b7..fa272ea 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp +++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp @@ -10,15 +10,31 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" - #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <utility> +#include <vector> using namespace llvm; using namespace rdf; @@ -28,6 +44,12 @@ using namespace rdf; namespace llvm { namespace rdf { +raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) { + if (!P.Mask.all()) + OS << ':' << PrintLaneMask(P.Mask); + return OS; +} + template<> raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) { auto &TRI = P.G.getTRI(); @@ -35,13 +57,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) { OS << TRI.getName(P.Obj.Reg); else OS << '#' << P.Obj.Reg; - if (P.Obj.Sub > 0) { - OS << ':'; - if (P.Obj.Sub < TRI.getNumSubRegIndices()) - OS << TRI.getSubRegIndexName(P.Obj.Sub); - else - OS << '#' << P.Obj.Sub; - } + OS << PrintLaneMaskOpt(P.Obj.Mask); return OS; } @@ -62,6 +78,10 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) { } break; case NodeAttrs::Ref: + if (Flags & NodeAttrs::Undef) + OS << '/'; + if (Flags & NodeAttrs::Dead) + OS << '\\'; if (Flags & NodeAttrs::Preserving) OS << '+'; if (Flags & NodeAttrs::Clobbering) @@ -83,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) { return OS; } -namespace { - void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA, - const DataFlowGraph &G) { - OS << Print<NodeId>(RA.Id, G) << '<' - << Print<RegisterRef>(RA.Addr->getRegRef(), G) << '>'; - if (RA.Addr->getFlags() & NodeAttrs::Fixed) - OS << '!'; - } +static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA, + const DataFlowGraph &G) { + OS << Print<NodeId>(RA.Id, G) << '<' + << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>'; + if (RA.Addr->getFlags() & NodeAttrs::Fixed) + OS << '!'; } template<> @@ -178,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) { } namespace { + template <typename T> struct PrintListV { PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {} + typedef T Type; const NodeList &List; const DataFlowGraph &G; @@ -196,7 +216,8 @@ namespace { } return OS; } -} + +} // end anonymous namespace template<> raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) { @@ -208,9 +229,27 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) { template<> raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<StmtNode*>> &P) { - unsigned Opc = P.Obj.Addr->getCode()->getOpcode(); - OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc) - << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']'; + const MachineInstr &MI = *P.Obj.Addr->getCode(); + unsigned Opc = MI.getOpcode(); + OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc); + // Print the target for calls and branches (for readability). + if (MI.isCall() || MI.isBranch()) { + MachineInstr::const_mop_iterator T = + llvm::find_if(MI.operands(), + [] (const MachineOperand &Op) -> bool { + return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); + }); + if (T != MI.operands_end()) { + OS << ' '; + if (T->isMBB()) + OS << "BB#" << T->getMBB()->getNumber(); + else if (T->isGlobal()) + OS << T->getGlobal()->getName(); + else if (T->isSymbol()) + OS << T->getSymbolName(); + } + } + OS << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']'; return OS; } @@ -234,29 +273,29 @@ raw_ostream &operator<< (raw_ostream &OS, template<> raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<BlockNode*>> &P) { - auto *BB = P.Obj.Addr->getCode(); + MachineBasicBlock *BB = P.Obj.Addr->getCode(); unsigned NP = BB->pred_size(); std::vector<int> Ns; auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void { unsigned N = Ns.size(); - for (auto I : Ns) { + for (int I : Ns) { OS << "BB#" << I; if (--N) OS << ", "; } }; - OS << Print<NodeId>(P.Obj.Id, P.G) << ": === BB#" << BB->getNumber() - << " === preds(" << NP << "): "; - for (auto I : BB->predecessors()) - Ns.push_back(I->getNumber()); + OS << Print<NodeId>(P.Obj.Id, P.G) << ": --- BB#" << BB->getNumber() + << " --- preds(" << NP << "): "; + for (MachineBasicBlock *B : BB->predecessors()) + Ns.push_back(B->getNumber()); PrintBBs(Ns); unsigned NS = BB->succ_size(); OS << " succs(" << NS << "): "; Ns.clear(); - for (auto I : BB->successors()) - Ns.push_back(I->getNumber()); + for (MachineBasicBlock *B : BB->successors()) + Ns.push_back(B->getNumber()); PrintBBs(Ns); OS << '\n'; @@ -286,11 +325,17 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) { } template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) { + P.Obj.print(OS); + return OS; +} + +template<> raw_ostream &operator<< (raw_ostream &OS, const Print<DataFlowGraph::DefStack> &P) { for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) { OS << Print<NodeId>(I->Id, P.G) - << '<' << Print<RegisterRef>(I->Addr->getRegRef(), P.G) << '>'; + << '<' << Print<RegisterRef>(I->Addr->getRegRef(P.G), P.G) << '>'; I.down(); if (I != E) OS << ' '; @@ -298,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS, return OS; } -} // namespace rdf -} // namespace llvm +} // end namespace rdf +} // end namespace llvm // Node allocation functions. // @@ -361,7 +406,6 @@ void NodeAllocator::clear() { ActiveEnd = nullptr; } - // Insert node NA after "this" in the circular chain. void NodeBase::append(NodeAddr<NodeBase*> NA) { NodeId Nx = Next; @@ -372,31 +416,31 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) { } } - // Fundamental node manipulator functions. // Obtain the register reference from a reference node. -RegisterRef RefNode::getRegRef() const { +RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const { assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef) - return Ref.RR; + return G.unpack(Ref.PR); assert(Ref.Op != nullptr); - return { Ref.Op->getReg(), Ref.Op->getSubReg() }; + return G.makeRegRef(Ref.Op->getReg(), Ref.Op->getSubReg()); } // Set the register reference in the reference node directly (for references // in phi nodes). -void RefNode::setRegRef(RegisterRef RR) { +void RefNode::setRegRef(RegisterRef RR, DataFlowGraph &G) { assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef); - Ref.RR = RR; + Ref.PR = G.pack(RR); } // Set the register reference in the reference node based on a machine // operand (for references in statement nodes). -void RefNode::setRegRef(MachineOperand *Op) { +void RefNode::setRegRef(MachineOperand *Op, DataFlowGraph &G) { assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)); + (void)G; Ref.Op = Op; } @@ -442,7 +486,7 @@ NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const { // Add node NA at the end of the member list of the given code node. void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) { - auto ML = getLastMember(G); + NodeAddr<NodeBase*> ML = getLastMember(G); if (ML.Id != 0) { ML.Addr->append(NA); } else { @@ -463,7 +507,7 @@ void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA, // Remove member node NA from the given code node. void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) { - auto MA = getFirstMember(G); + NodeAddr<NodeBase*> MA = getFirstMember(G); assert(MA.Id != 0); // Special handling if the member to remove is the first member. @@ -514,7 +558,7 @@ NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) { // Add the phi node PA to the given block node. void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) { - auto M = getFirstMember(G); + NodeAddr<NodeBase*> M = getFirstMember(G); if (M.Id == 0) { addMember(PA, G); return; @@ -560,115 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) { return findBlock(EntryB, G); } - -// Register aliasing information. -// -// In theory, the lane information could be used to determine register -// covering (and aliasing), but depending on the sub-register structure, -// the lane mask information may be missing. The covering information -// must be available for this framework to work, so relying solely on -// the lane data is not sufficient. - -// Determine whether RA covers RB. -bool RegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const { - if (RA == RB) - return true; - if (TargetRegisterInfo::isVirtualRegister(RA.Reg)) { - assert(TargetRegisterInfo::isVirtualRegister(RB.Reg)); - if (RA.Reg != RB.Reg) - return false; - if (RA.Sub == 0) - return true; - return TRI.composeSubRegIndices(RA.Sub, RB.Sub) == RA.Sub; - } - - assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg) && - TargetRegisterInfo::isPhysicalRegister(RB.Reg)); - unsigned A = RA.Sub != 0 ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg; - unsigned B = RB.Sub != 0 ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg; - return TRI.isSubRegister(A, B); -} - -// Determine whether RR is covered by the set of references RRs. -bool RegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) const { - if (RRs.count(RR)) - return true; - - // For virtual registers, we cannot accurately determine covering based - // on subregisters. If RR itself is not present in RRs, but it has a sub- - // register reference, check for the super-register alone. Otherwise, - // assume non-covering. - if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) { - if (RR.Sub != 0) - return RRs.count({RR.Reg, 0}); - return false; - } - - // If any super-register of RR is present, then RR is covered. - unsigned Reg = RR.Sub == 0 ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub); - for (MCSuperRegIterator SR(Reg, &TRI); SR.isValid(); ++SR) - if (RRs.count({*SR, 0})) - return true; - - return false; -} - -// Get the list of references aliased to RR. -std::vector<RegisterRef> RegisterAliasInfo::getAliasSet(RegisterRef RR) const { - // Do not include RR in the alias set. For virtual registers return an - // empty set. - std::vector<RegisterRef> AS; - if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) - return AS; - assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg)); - unsigned R = RR.Reg; - if (RR.Sub) - R = TRI.getSubReg(RR.Reg, RR.Sub); - - for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) - AS.push_back(RegisterRef({*AI, 0})); - return AS; -} - -// Check whether RA and RB are aliased. -bool RegisterAliasInfo::alias(RegisterRef RA, RegisterRef RB) const { - bool VirtA = TargetRegisterInfo::isVirtualRegister(RA.Reg); - bool VirtB = TargetRegisterInfo::isVirtualRegister(RB.Reg); - bool PhysA = TargetRegisterInfo::isPhysicalRegister(RA.Reg); - bool PhysB = TargetRegisterInfo::isPhysicalRegister(RB.Reg); - - if (VirtA != VirtB) - return false; - - if (VirtA) { - if (RA.Reg != RB.Reg) - return false; - // RA and RB refer to the same register. If any of them refer to the - // whole register, they must be aliased. - if (RA.Sub == 0 || RB.Sub == 0) - return true; - unsigned SA = TRI.getSubRegIdxSize(RA.Sub); - unsigned OA = TRI.getSubRegIdxOffset(RA.Sub); - unsigned SB = TRI.getSubRegIdxSize(RB.Sub); - unsigned OB = TRI.getSubRegIdxOffset(RB.Sub); - if (OA <= OB && OA+SA > OB) - return true; - if (OB <= OA && OB+SB > OA) - return true; - return false; - } - - assert(PhysA && PhysB); - (void)PhysA, (void)PhysB; - unsigned A = RA.Sub ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg; - unsigned B = RB.Sub ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg; - for (MCRegAliasIterator I(A, &TRI, true); I.isValid(); ++I) - if (B == *I) - return true; - return false; -} - - // Target operand information. // @@ -695,7 +630,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) return true; // Check for a tail call. if (In.isBranch()) - for (auto &O : In.operands()) + for (const MachineOperand &O : In.operands()) if (O.isGlobal() || O.isSymbol()) return true; @@ -708,7 +643,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) // uses or defs, and those lists do not allow sub-registers. if (Op.getSubReg() != 0) return false; - unsigned Reg = Op.getReg(); + RegisterId Reg = Op.getReg(); const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs() : D.getImplicitUses(); if (!ImpR) @@ -719,6 +654,108 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) return false; } +RegisterRef RegisterAggr::normalize(RegisterRef RR) const { + RegisterId SuperReg = RR.Reg; + while (true) { + MCSuperRegIterator SR(SuperReg, &TRI, false); + if (!SR.isValid()) + break; + SuperReg = *SR; + } + + const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg); + LaneBitmask Common = RR.Mask & RC.LaneMask; + uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg); + LaneBitmask SuperMask = TRI.composeSubRegIndexLaneMask(Sub, Common); + return RegisterRef(SuperReg, SuperMask); +} + +bool RegisterAggr::hasAliasOf(RegisterRef RR) const { + RegisterRef NR = normalize(RR); + auto F = Masks.find(NR.Reg); + if (F != Masks.end()) { + if ((F->second & NR.Mask).any()) + return true; + } + if (CheckUnits) { + for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U) + if (ExpAliasUnits.test(*U)) + return true; + } + return false; +} + +bool RegisterAggr::hasCoverOf(RegisterRef RR) const { + // Always have a cover for empty lane mask. + RegisterRef NR = normalize(RR); + if (NR.Mask.none()) + return true; + auto F = Masks.find(NR.Reg); + if (F == Masks.end()) + return false; + return (NR.Mask & F->second) == NR.Mask; +} + +RegisterAggr &RegisterAggr::insert(RegisterRef RR) { + RegisterRef NR = normalize(RR); + auto F = Masks.find(NR.Reg); + if (F == Masks.end()) + Masks.insert({NR.Reg, NR.Mask}); + else + F->second |= NR.Mask; + + // Visit all register units to see if there are any that were created + // by explicit aliases. Add those that were to the bit vector. + for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U) { + MCRegUnitRootIterator R(*U, &TRI); + ++R; + if (!R.isValid()) + continue; + ExpAliasUnits.set(*U); + CheckUnits = true; + } + return *this; +} + +RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) { + for (std::pair<RegisterId,LaneBitmask> P : RG.Masks) + insert(RegisterRef(P.first, P.second)); + return *this; +} + +RegisterAggr &RegisterAggr::clear(RegisterRef RR) { + RegisterRef NR = normalize(RR); + auto F = Masks.find(NR.Reg); + if (F == Masks.end()) + return *this; + LaneBitmask NewM = F->second & ~NR.Mask; + if (NewM.none()) + Masks.erase(F); + else + F->second = NewM; + return *this; +} + +RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) { + for (std::pair<RegisterId,LaneBitmask> P : RG.Masks) + clear(RegisterRef(P.first, P.second)); + return *this; +} + +RegisterRef RegisterAggr::clearIn(RegisterRef RR) const { + RegisterAggr T(TRI); + T.insert(RR).clear(*this); + if (T.empty()) + return RegisterRef(); + return RegisterRef(T.begin()->first, T.begin()->second); +} + +void RegisterAggr::print(raw_ostream &OS) const { + OS << '{'; + for (auto I : Masks) + OS << ' ' << PrintReg(I.first, &TRI) << PrintLaneMaskOpt(I.second); + OS << " }"; +} // // The data flow graph construction. @@ -726,13 +763,10 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, - const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai, - const TargetOperandInfo &toi) - : TimeG("rdf"), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), RAI(rai), - TOI(toi) { + const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi) + : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) { } - // The implementation of the definition stack. // Each register reference has its own definition stack. In particular, // for a register references "Reg" and "Reg:subreg" will each have their @@ -821,6 +855,32 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const { return P; } +// Register information. + +// Get the list of references aliased to RR. Lane masks are ignored. +RegisterSet DataFlowGraph::getAliasSet(RegisterId Reg) const { + // Do not include RR in the alias set. + RegisterSet AS; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + + for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI) + AS.insert(RegisterRef(*AI)); + return AS; +} + +RegisterSet DataFlowGraph::getLandingPadLiveIns() const { + RegisterSet LR; + const Function &F = *MF.getFunction(); + const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn() + : nullptr; + const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); + if (RegisterId R = TLI.getExceptionPointerRegister(PF)) + LR.insert(RegisterRef(R)); + if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) + LR.insert(RegisterRef(R)); + return LR; +} + // Node management functions. // Get the pointer to the node with the id N. @@ -864,13 +924,12 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) { return NA; } - // Allocation routines for specific node types/kinds. NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner, MachineOperand &Op, uint16_t Flags) { NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags); - UA.Addr->setRegRef(&Op); + UA.Addr->setRegRef(&Op, *this); return UA; } @@ -878,7 +937,7 @@ NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner, RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) { NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags); assert(Flags & NodeAttrs::PhiRef); - PUA.Addr->setRegRef(RR); + PUA.Addr->setRegRef(RR, *this); PUA.Addr->setPredecessor(PredB.Id); return PUA; } @@ -886,7 +945,7 @@ NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner, NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner, MachineOperand &Op, uint16_t Flags) { NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags); - DA.Addr->setRegRef(&Op); + DA.Addr->setRegRef(&Op, *this); return DA; } @@ -894,7 +953,7 @@ NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner, RegisterRef RR, uint16_t Flags) { NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags); assert(Flags & NodeAttrs::PhiRef); - DA.Addr->setRegRef(RR); + DA.Addr->setRegRef(RR, *this); return DA; } @@ -934,17 +993,20 @@ void DataFlowGraph::build(unsigned Options) { if (MF.empty()) return; - for (auto &B : MF) { - auto BA = newBlock(Func, &B); - for (auto &I : B) { + for (MachineBasicBlock &B : MF) { + NodeAddr<BlockNode*> BA = newBlock(Func, &B); + BlockNodes.insert(std::make_pair(&B, BA)); + for (MachineInstr &I : B) { if (I.isDebugValue()) continue; buildStmt(BA, I); } } - // Collect information about block references. NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this); + NodeList Blocks = Func.Addr->members(*this); + + // Collect information about block references. BlockRefsMap RefM; buildBlockRefs(EA, RefM); @@ -952,16 +1014,48 @@ void DataFlowGraph::build(unsigned Options) { MachineRegisterInfo &MRI = MF.getRegInfo(); for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { NodeAddr<PhiNode*> PA = newPhi(EA); - RegisterRef RR = { I->first, 0 }; + RegisterRef RR = RegisterRef(I->first); uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags); PA.Addr->addMember(DA, *this); } + // Add phis for landing pads. + // Landing pads, unlike usual backs blocks, are not entered through + // branches in the program, or fall-throughs from other blocks. They + // are entered from the exception handling runtime and target's ABI + // may define certain registers as defined on entry to such a block. + RegisterSet EHRegs = getLandingPadLiveIns(); + if (!EHRegs.empty()) { + for (NodeAddr<BlockNode*> BA : Blocks) { + const MachineBasicBlock &B = *BA.Addr->getCode(); + if (!B.isEHPad()) + continue; + + // Prepare a list of NodeIds of the block's predecessors. + NodeList Preds; + for (MachineBasicBlock *PB : B.predecessors()) + Preds.push_back(findBlock(PB)); + + // Build phi nodes for each live-in. + for (RegisterRef RR : EHRegs) { + NodeAddr<PhiNode*> PA = newPhi(BA); + uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; + // Add def: + NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags); + PA.Addr->addMember(DA, *this); + // Add uses (no reaching defs for phi uses): + for (NodeAddr<BlockNode*> PBA : Preds) { + NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA); + PA.Addr->addMember(PUA, *this); + } + } + } + } + // Build a map "PhiM" which will contain, for each block, the set // of references that will require phi definitions in that block. BlockRefsMap PhiM; - auto Blocks = Func.Addr->members(*this); for (NodeAddr<BlockNode*> BA : Blocks) recordDefsForDF(PhiM, RefM, BA); for (NodeAddr<BlockNode*> BA : Blocks) @@ -976,6 +1070,47 @@ void DataFlowGraph::build(unsigned Options) { removeUnusedPhis(); } +RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (Sub != 0) + Reg = TRI.getSubReg(Reg, Sub); + return RegisterRef(Reg); +} + +RegisterRef DataFlowGraph::normalizeRef(RegisterRef RR) const { + // FIXME copied from RegisterAggr + RegisterId SuperReg = RR.Reg; + while (true) { + MCSuperRegIterator SR(SuperReg, &TRI, false); + if (!SR.isValid()) + break; + SuperReg = *SR; + } + + uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg); + const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg); + LaneBitmask SuperMask = RR.Mask & + TRI.composeSubRegIndexLaneMask(Sub, RC.LaneMask); + return RegisterRef(SuperReg, SuperMask); +} + +RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { + if (AR.Reg == BR.Reg) { + LaneBitmask M = AR.Mask & BR.Mask; + return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef(); + } +#ifndef NDEBUG + RegisterRef NAR = normalizeRef(AR); + RegisterRef NBR = normalizeRef(BR); + assert(NAR.Reg != NBR.Reg); +#endif + // This isn't strictly correct, because the overlap may happen in the + // part masked out. + if (TRI.regsOverlap(AR.Reg, BR.Reg)) + return AR; + return RegisterRef(); +} + // For each stack in the map DefM, push the delimiter for block B on it. void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) { // Push block delimiters. @@ -1024,28 +1159,31 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) { for (NodeAddr<DefNode*> DA : Defs) { if (Visited.count(DA.Id)) continue; + NodeList Rel = getRelatedRefs(IA, DA); NodeAddr<DefNode*> PDA = Rel.front(); - // Push the definition on the stack for the register and all aliases. - RegisterRef RR = PDA.Addr->getRegRef(); + RegisterRef RR = PDA.Addr->getRegRef(*this); #ifndef NDEBUG // Assert if the register is defined in two or more unrelated defs. // This could happen if there are two or more def operands defining it. if (!Defined.insert(RR).second) { - auto *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); + MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); dbgs() << "Multiple definitions of register: " << Print<RegisterRef>(RR, *this) << " in\n " << *MI << "in BB#" << MI->getParent()->getNumber() << '\n'; llvm_unreachable(nullptr); } #endif - DefM[RR].push(DA); - for (auto A : RAI.getAliasSet(RR)) { + // Push the definition on the stack for the register and all aliases. + // The def stack traversal in linkNodeUp will check the exact aliasing. + DefM[RR.Reg].push(DA); + for (RegisterRef A : getAliasSet(RR.Reg /*FIXME? use RegisterRef*/)) { + // Check that we don't push the same def twice. assert(A != RR); - DefM[A].push(DA); + DefM[A.Reg].push(DA); } // Mark all the related defs as visited. - for (auto T : Rel) + for (NodeAddr<NodeBase*> T : Rel) Visited.insert(T.Id); } } @@ -1065,14 +1203,66 @@ NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA, return Refs; } +// Return true if RA and RB overlap, false otherwise. +bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const { + assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg)); + assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg)); + + MCRegUnitMaskIterator UMA(RA.Reg, &TRI); + MCRegUnitMaskIterator UMB(RB.Reg, &TRI); + // Reg units are returned in the numerical order. + while (UMA.isValid() && UMB.isValid()) { + std::pair<uint32_t,LaneBitmask> PA = *UMA; + std::pair<uint32_t,LaneBitmask> PB = *UMB; + if (PA.first == PB.first) { + // Lane mask of 0 (given by the iterator) should be treated as "full". + // This can happen when the register has only one unit, or when the + // unit corresponds to explicit aliasing. In such cases, the lane mask + // from RegisterRef should be ignored. + if (PA.second.none() || PB.second.none()) + return true; + + // At this point the common unit corresponds to a subregister. The lane + // masks correspond to the lane mask of that unit within the original + // register, for example assuming register quadruple q0 = r3:0, and + // a register pair d1 = r3:2, the lane mask of r2 in q0 may be 0b0100, + // while the lane mask of r2 in d1 may be 0b0001. + LaneBitmask LA = PA.second & RA.Mask; + LaneBitmask LB = PB.second & RB.Mask; + if (LA.any() && LB.any()) { + unsigned Root = *MCRegUnitRootIterator(PA.first, &TRI); + // If register units were guaranteed to only have 1 bit in any lane + // mask, the code below would not be necessary. This is because LA + // and LB would have at most 1 bit set each, and that bit would be + // guaranteed to correspond to the given register unit. + uint32_t SubA = TRI.getSubRegIndex(RA.Reg, Root); + uint32_t SubB = TRI.getSubRegIndex(RB.Reg, Root); + const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(Root); + LaneBitmask MaskA = TRI.reverseComposeSubRegIndexLaneMask(SubA, LA); + LaneBitmask MaskB = TRI.reverseComposeSubRegIndexLaneMask(SubB, LB); + if ((MaskA & MaskB & RC.LaneMask).any()) + return true; + } + + ++UMA; + ++UMB; + continue; + } + if (PA.first < PB.first) + ++UMA; + else if (PB.first < PA.first) + ++UMB; + } + return false; +} // Clear all information in the graph. void DataFlowGraph::reset() { Memory.clear(); + BlockNodes.clear(); Func = NodeAddr<FuncNode*>(); } - // Return the next reference node in the instruction node IA that is related // to RA. Conceptually, two reference nodes are related if they refer to the // same instance of a register access, but differ in flags or other minor @@ -1083,10 +1273,10 @@ NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA) const { assert(IA.Id != 0 && RA.Id != 0); - auto Related = [RA](NodeAddr<RefNode*> TA) -> bool { + auto Related = [this,RA](NodeAddr<RefNode*> TA) -> bool { if (TA.Addr->getKind() != RA.Addr->getKind()) return false; - if (TA.Addr->getRegRef() != RA.Addr->getRegRef()) + if (TA.Addr->getRegRef(*this) != RA.Addr->getRegRef(*this)) return false; return true; }; @@ -1105,7 +1295,7 @@ NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA, return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor(); }; - RegisterRef RR = RA.Addr->getRegRef(); + RegisterRef RR = RA.Addr->getRegRef(*this); if (IA.Addr->getKind() == NodeAttrs::Stmt) return RA.Addr->getNextRef(RR, RelatedStmt, true, *this); return RA.Addr->getNextRef(RR, RelatedPhi, true, *this); @@ -1174,31 +1364,45 @@ NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA, // Create a new statement node in the block node BA that corresponds to // the machine instruction MI. void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { - auto SA = newStmt(BA, &In); + NodeAddr<StmtNode*> SA = newStmt(BA, &In); auto isCall = [] (const MachineInstr &In) -> bool { if (In.isCall()) return true; // Is tail call? if (In.isBranch()) - for (auto &Op : In.operands()) + for (const MachineOperand &Op : In.operands()) if (Op.isGlobal() || Op.isSymbol()) return true; return false; }; + auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool { + // This instruction defines DR. Check if there is a use operand that + // would make DR live on entry to the instruction. + for (const MachineOperand &UseOp : In.operands()) { + if (!UseOp.isReg() || !UseOp.isUse() || UseOp.isUndef()) + continue; + RegisterRef UR = makeRegRef(UseOp.getReg(), UseOp.getSubReg()); + if (alias(DR, UR)) + return false; + } + return true; + }; + // Collect a set of registers that this instruction implicitly uses // or defines. Implicit operands from an instruction will be ignored // unless they are listed here. RegisterSet ImpUses, ImpDefs; if (const uint16_t *ImpD = In.getDesc().getImplicitDefs()) while (uint16_t R = *ImpD++) - ImpDefs.insert({R, 0}); + ImpDefs.insert(RegisterRef(R)); if (const uint16_t *ImpU = In.getDesc().getImplicitUses()) while (uint16_t R = *ImpU++) - ImpUses.insert({R, 0}); + ImpUses.insert(RegisterRef(R)); - bool NeedsImplicit = isCall(In) || In.isInlineAsm() || In.isReturn(); + bool IsCall = isCall(In); + bool NeedsImplicit = IsCall || In.isInlineAsm() || In.isReturn(); bool IsPredicated = TII.isPredicated(In); unsigned NumOps = In.getNumOperands(); @@ -1212,14 +1416,20 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; - RegisterRef RR = { Op.getReg(), Op.getSubReg() }; + RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg()); uint16_t Flags = NodeAttrs::None; - if (TOI.isPreserving(In, OpN)) + if (TOI.isPreserving(In, OpN)) { Flags |= NodeAttrs::Preserving; + // If the def is preserving, check if it is also undefined. + if (isDefUndef(In, RR)) + Flags |= NodeAttrs::Undef; + } if (TOI.isClobbering(In, OpN)) Flags |= NodeAttrs::Clobbering; if (TOI.isFixedReg(In, OpN)) Flags |= NodeAttrs::Fixed; + if (IsCall && Op.isDead()) + Flags |= NodeAttrs::Dead; NodeAddr<DefNode*> DA = newDef(SA, Op, Flags); SA.Addr->addMember(DA, *this); DoneDefs.insert(RR); @@ -1231,18 +1441,24 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isDef() || !Op.isImplicit()) continue; - RegisterRef RR = { Op.getReg(), Op.getSubReg() }; + RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg()); if (!NeedsImplicit && !ImpDefs.count(RR)) continue; if (DoneDefs.count(RR)) continue; uint16_t Flags = NodeAttrs::None; - if (TOI.isPreserving(In, OpN)) + if (TOI.isPreserving(In, OpN)) { Flags |= NodeAttrs::Preserving; + // If the def is preserving, check if it is also undefined. + if (isDefUndef(In, RR)) + Flags |= NodeAttrs::Undef; + } if (TOI.isClobbering(In, OpN)) Flags |= NodeAttrs::Clobbering; if (TOI.isFixedReg(In, OpN)) Flags |= NodeAttrs::Fixed; + if (IsCall && Op.isDead()) + Flags |= NodeAttrs::Dead; NodeAddr<DefNode*> DA = newDef(SA, Op, Flags); SA.Addr->addMember(DA, *this); DoneDefs.insert(RR); @@ -1252,7 +1468,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isUse()) continue; - RegisterRef RR = { Op.getReg(), Op.getSubReg() }; + RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg()); // Add implicit uses on return and call instructions, and on predicated // instructions regardless of whether or not they appear in the instruction // descriptor's list. @@ -1261,6 +1477,8 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { if (Implicit && !TakeImplicit && !ImpUses.count(RR)) continue; uint16_t Flags = NodeAttrs::None; + if (Op.isUndef()) + Flags |= NodeAttrs::Undef; if (TOI.isFixedReg(In, OpN)) Flags |= NodeAttrs::Fixed; NodeAddr<UseNode*> UA = newUse(SA, Op, Flags); @@ -1272,20 +1490,20 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { // that block, and from all blocks dominated by it. void DataFlowGraph::buildBlockRefs(NodeAddr<BlockNode*> BA, BlockRefsMap &RefM) { - auto &Refs = RefM[BA.Id]; + RegisterSet &Refs = RefM[BA.Id]; MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); assert(N); for (auto I : *N) { MachineBasicBlock *SB = I->getBlock(); - auto SBA = Func.Addr->findBlock(SB, *this); + NodeAddr<BlockNode*> SBA = findBlock(SB); buildBlockRefs(SBA, RefM); - const auto &SRs = RefM[SBA.Id]; - Refs.insert(SRs.begin(), SRs.end()); + const RegisterSet &RefsS = RefM[SBA.Id]; + Refs.insert(RefsS.begin(), RefsS.end()); } for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) for (NodeAddr<RefNode*> RA : IA.Addr->members(*this)) - Refs.insert(RA.Addr->getRegRef()); + Refs.insert(RA.Addr->getRegRef(*this)); } // Scan all defs in the block node BA and record in PhiM the locations of @@ -1307,17 +1525,11 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, // This is done to make sure that each defined reference gets only one // phi node, even if it is defined multiple times. RegisterSet Defs; - for (auto I : BA.Addr->members(*this)) { - assert(I.Addr->getType() == NodeAttrs::Code); - assert(I.Addr->getKind() == NodeAttrs::Phi || - I.Addr->getKind() == NodeAttrs::Stmt); - NodeAddr<InstrNode*> IA = I; + for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this)) - Defs.insert(RA.Addr->getRegRef()); - } + Defs.insert(RA.Addr->getRegRef(*this)); - // Finally, add the set of defs to each block in the iterated dominance - // frontier. + // Calculate the iterated dominance frontier of BB. const MachineDominanceFrontier::DomSetType &DF = DFLoc->second; SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end()); for (unsigned i = 0; i < IDF.size(); ++i) { @@ -1329,13 +1541,15 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, // Get the register references that are reachable from this block. RegisterSet &Refs = RefM[BA.Id]; for (auto DB : IDF) { - auto DBA = Func.Addr->findBlock(DB, *this); - const auto &Rs = RefM[DBA.Id]; - Refs.insert(Rs.begin(), Rs.end()); + NodeAddr<BlockNode*> DBA = findBlock(DB); + const RegisterSet &RefsD = RefM[DBA.Id]; + Refs.insert(RefsD.begin(), RefsD.end()); } + // Finally, add the set of defs to each block in the iterated dominance + // frontier. for (auto DB : IDF) { - auto DBA = Func.Addr->findBlock(DB, *this); + NodeAddr<BlockNode*> DBA = findBlock(DB); PhiM[DBA.Id].insert(Defs.begin(), Defs.end()); } } @@ -1355,19 +1569,19 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, // are not covered by another ref (i.e. maximal with respect to covering). auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef { - for (auto I : RRs) - if (I != RR && RAI.covers(I, RR)) + for (RegisterRef I : RRs) + if (I != RR && RegisterAggr::isCoverOf(I, RR, TRI)) RR = I; return RR; }; RegisterSet MaxDF; - for (auto I : HasDF->second) + for (RegisterRef I : HasDF->second) MaxDF.insert(MaxCoverIn(I, HasDF->second)); std::vector<RegisterRef> MaxRefs; - auto &RefB = RefM[BA.Id]; - for (auto I : MaxDF) + RegisterSet &RefB = RefM[BA.Id]; + for (RegisterRef I : MaxDF) MaxRefs.push_back(MaxCoverIn(I, RefB)); // Now, for each R in MaxRefs, get the alias closure of R. If the closure @@ -1382,19 +1596,17 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, auto Aliased = [this,&MaxRefs](RegisterRef RR, std::vector<unsigned> &Closure) -> bool { - for (auto I : Closure) - if (RAI.alias(RR, MaxRefs[I])) + for (unsigned I : Closure) + if (alias(RR, MaxRefs[I])) return true; return false; }; // Prepare a list of NodeIds of the block's predecessors. - std::vector<NodeId> PredList; + NodeList Preds; const MachineBasicBlock *MBB = BA.Addr->getCode(); - for (auto PB : MBB->predecessors()) { - auto B = Func.Addr->findBlock(PB, *this); - PredList.push_back(B.Id); - } + for (MachineBasicBlock *PB : MBB->predecessors()) + Preds.push_back(findBlock(PB)); while (!MaxRefs.empty()) { // Put the first element in the closure, and then add all subsequent @@ -1418,8 +1630,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, PA.Addr->addMember(DA, *this); } // Add phi uses. - for (auto P : PredList) { - auto PBA = addr<BlockNode*>(P); + for (NodeAddr<BlockNode*> PBA : Preds) { for (unsigned X = 0; X != CS; ++X) { RegisterRef RR = MaxRefs[ClosureIdx[X]]; NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA); @@ -1449,7 +1660,7 @@ void DataFlowGraph::removeUnusedPhis() { } static auto HasUsedDef = [](NodeList &Ms) -> bool { - for (auto M : Ms) { + for (NodeAddr<NodeBase*> M : Ms) { if (M.Addr->getKind() != NodeAttrs::Def) continue; NodeAddr<DefNode*> DA = M; @@ -1493,25 +1704,25 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA, DefStack &DS) { if (DS.empty()) return; - RegisterRef RR = TA.Addr->getRegRef(); + RegisterRef RR = TA.Addr->getRegRef(*this); NodeAddr<T> TAP; // References from the def stack that have been examined so far. - RegisterSet Defs; + RegisterAggr Defs(TRI); for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) { - RegisterRef QR = I->Addr->getRegRef(); - auto AliasQR = [QR,this] (RegisterRef RR) -> bool { - return RAI.alias(QR, RR); - }; - bool PrecUp = RAI.covers(QR, RR); + RegisterRef QR = I->Addr->getRegRef(*this); + // Skip all defs that are aliased to any of the defs that we have already - // seen. If we encounter a covering def, stop the stack traversal early. - if (std::any_of(Defs.begin(), Defs.end(), AliasQR)) { - if (PrecUp) + // seen. If this completes a cover of RR, stop the stack traversal. + bool Alias = Defs.hasAliasOf(QR); + bool Cover = Defs.insert(QR).hasCoverOf(RR); + if (Alias) { + if (Cover) break; continue; } + // The reaching def. NodeAddr<DefNode*> RDA = *I; @@ -1527,27 +1738,29 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA, // Create the link. TAP.Addr->linkToDef(TAP.Id, RDA); - if (PrecUp) + if (Cover) break; - Defs.insert(QR); } } // Create data-flow links for all reference nodes in the statement node SA. void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) { +#ifndef NDEBUG RegisterSet Defs; +#endif // Link all nodes (upwards in the data-flow) with their reaching defs. for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) { uint16_t Kind = RA.Addr->getKind(); assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use); - RegisterRef RR = RA.Addr->getRegRef(); - // Do not process multiple defs of the same reference. - if (Kind == NodeAttrs::Def && Defs.count(RR)) - continue; + RegisterRef RR = RA.Addr->getRegRef(*this); +#ifndef NDEBUG + // Do not expect multiple defs of the same reference. + assert(Kind != NodeAttrs::Def || !Defs.count(RR)); Defs.insert(RR); +#endif - auto F = DefM.find(RR); + auto F = DefM.find(RR.Reg); if (F == DefM.end()) continue; DefStack &DS = F->second; @@ -1584,7 +1797,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) { MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); for (auto I : *N) { MachineBasicBlock *SB = I->getBlock(); - auto SBA = Func.Addr->findBlock(SB, *this); + NodeAddr<BlockNode*> SBA = findBlock(SB); linkBlockRefs(DefM, SBA); } @@ -1596,15 +1809,27 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) { NodeAddr<PhiUseNode*> PUA = NA; return PUA.Addr->getPredecessor() == BA.Id; }; + + RegisterSet EHLiveIns = getLandingPadLiveIns(); MachineBasicBlock *MBB = BA.Addr->getCode(); - for (auto SB : MBB->successors()) { - auto SBA = Func.Addr->findBlock(SB, *this); + + for (MachineBasicBlock *SB : MBB->successors()) { + bool IsEHPad = SB->isEHPad(); + NodeAddr<BlockNode*> SBA = findBlock(SB); for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) { + // Do not link phi uses for landing pad live-ins. + if (IsEHPad) { + // Find what register this phi is for. + NodeAddr<RefNode*> RA = IA.Addr->getFirstMember(*this); + assert(RA.Id != 0); + if (EHLiveIns.count(RA.Addr->getRegRef(*this))) + continue; + } // Go over each phi use associated with MBB, and link it. for (auto U : IA.Addr->members_if(IsUseForBA, *this)) { NodeAddr<PhiUseNode*> PUA = U; - RegisterRef RR = PUA.Addr->getRegRef(); - linkRefUp<UseNode*>(IA, PUA, DefM[RR]); + RegisterRef RR = PUA.Addr->getRegRef(*this); + linkRefUp<UseNode*>(IA, PUA, DefM[RR.Reg]); } } } diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h index 49b0537..49d78a8 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h +++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h @@ -1,4 +1,4 @@ -//===--- RDFGraph.h -------------------------------------------------------===// +//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -175,7 +175,29 @@ // - Clobbering: applied only to defs, indicates that the value generated // by this def is unspecified. A typical example would be volatile registers // after function calls. -// +// - Fixed: the register in this def/use cannot be replaced with any other +// register. A typical case would be a parameter register to a call, or +// the register with the return value from a function. +// - Undef: the register in this reference the register is assumed to have +// no pre-existing value, even if it appears to be reached by some def. +// This is typically used to prevent keeping registers artificially live +// in cases when they are defined via predicated instructions. For example: +// r0 = add-if-true cond, r10, r11 (1) +// r0 = add-if-false cond, r12, r13, r0<imp-use> (2) +// ... = r0 (3) +// Before (1), r0 is not intended to be live, and the use of r0 in (3) is +// not meant to be reached by any def preceding (1). However, since the +// defs in (1) and (2) are both preserving, these properties alone would +// imply that the use in (3) may indeed be reached by some prior def. +// Adding Undef flag to the def in (1) prevents that. The Undef flag +// may be applied to both defs and uses. +// - Dead: applies only to defs. The value coming out of a "dead" def is +// assumed to be unused, even if the def appears to be reaching other defs +// or uses. The motivation for this flag comes from dead defs on function +// calls: there is no way to determine if such a def is dead without +// analyzing the target's ABI. Hence the graph should contain this info, +// as it is unavailable otherwise. On the other hand, a def without any +// uses on a typical instruction is not the intended target for this flag. // // *** Shadow references // @@ -199,20 +221,34 @@ // The statement s5 has two use nodes for t0: u7" and u9". The quotation // mark " indicates that the node is a shadow. // -#ifndef RDF_GRAPH_H -#define RDF_GRAPH_H +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H +#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Timer.h" - +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstdint> +#include <cstring> #include <functional> #include <map> #include <set> +#include <unordered_map> +#include <utility> #include <vector> +// RDF uses uint32_t to refer to registers. This is to ensure that the type +// size remains specific. In other places, registers are often stored using +// unsigned. +static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal"); + namespace llvm { + class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -220,10 +256,13 @@ namespace llvm { class MachineDominanceFrontier; class MachineDominatorTree; class TargetInstrInfo; - class TargetRegisterInfo; namespace rdf { + typedef uint32_t NodeId; + typedef uint32_t RegisterId; + + struct DataFlowGraph; struct NodeAttrs { enum : uint16_t { @@ -243,13 +282,15 @@ namespace rdf { Block = 0x0005 << 2, // 101 Func = 0x0006 << 2, // 110 - // Flags: 5 bits for now - FlagMask = 0x001F << 5, - Shadow = 0x0001 << 5, // 00001, Has extra reaching defs. - Clobbering = 0x0002 << 5, // 00010, Produces unspecified values. - PhiRef = 0x0004 << 5, // 00100, Member of PhiNode. - Preserving = 0x0008 << 5, // 01000, Def can keep original bits. - Fixed = 0x0010 << 5, // 10000, Fixed register. + // Flags: 7 bits for now + FlagMask = 0x007F << 5, + Shadow = 0x0001 << 5, // 0000001, Has extra reaching defs. + Clobbering = 0x0002 << 5, // 0000010, Produces unspecified values. + PhiRef = 0x0004 << 5, // 0000100, Member of PhiNode. + Preserving = 0x0008 << 5, // 0001000, Def can keep original bits. + Fixed = 0x0010 << 5, // 0010000, Fixed register. + Undef = 0x0020 << 5, // 0100000, Has no pre-existing value. + Dead = 0x0040 << 5, // 1000000, Does not define a value. }; static uint16_t type(uint16_t T) { return T & TypeMask; } @@ -259,9 +300,11 @@ namespace rdf { static uint16_t set_type(uint16_t A, uint16_t T) { return (A & ~TypeMask) | T; } + static uint16_t set_kind(uint16_t A, uint16_t K) { return (A & ~KindMask) | K; } + static uint16_t set_flags(uint16_t A, uint16_t F) { return (A & ~FlagMask) | F; } @@ -292,10 +335,13 @@ namespace rdf { }; template <typename T> struct NodeAddr { - NodeAddr() : Addr(nullptr), Id(0) {} + NodeAddr() : Addr(nullptr) {} NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} - NodeAddr(const NodeAddr&) = default; - NodeAddr &operator= (const NodeAddr&) = default; + + // Type cast (casting constructor). The reason for having this class + // instead of std::pair. + template <typename S> NodeAddr(const NodeAddr<S> &NA) + : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {} bool operator== (const NodeAddr<T> &NA) const { assert((Addr == NA.Addr) == (Id == NA.Id)); @@ -304,13 +350,9 @@ namespace rdf { bool operator!= (const NodeAddr<T> &NA) const { return !operator==(NA); } - // Type cast (casting constructor). The reason for having this class - // instead of std::pair. - template <typename S> NodeAddr(const NodeAddr<S> &NA) - : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {} T Addr; - NodeId Id; + NodeId Id = 0; }; struct NodeBase; @@ -334,17 +376,20 @@ namespace rdf { struct NodeAllocator { // Amount of storage for a single node. enum { NodeMemSize = 32 }; + NodeAllocator(uint32_t NPB = 4096) : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)), - IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) { + IndexMask((1 << BitsPerIndex)-1) { assert(isPowerOf2_32(NPB)); } + NodeBase *ptr(NodeId N) const { uint32_t N1 = N-1; uint32_t BlockN = N1 >> BitsPerIndex; uint32_t Offset = (N1 & IndexMask) * NodeMemSize; return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset); } + NodeId id(const NodeBase *P) const; NodeAddr<NodeBase*> New(); void clear(); @@ -352,6 +397,7 @@ namespace rdf { private: void startNewBlock(); bool needNewBlock(); + uint32_t makeId(uint32_t Block, uint32_t Index) const { // Add 1 to the id, to avoid the id of 0, which is treated as "null". return ((Block << BitsPerIndex) | Index) + 1; @@ -360,46 +406,37 @@ namespace rdf { const uint32_t NodesPerBlock; const uint32_t BitsPerIndex; const uint32_t IndexMask; - char *ActiveEnd; + char *ActiveEnd = nullptr; std::vector<char*> Blocks; typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy; AllocatorTy MemPool; }; struct RegisterRef { - unsigned Reg, Sub; + RegisterId Reg; + LaneBitmask Mask; - // No non-trivial constructors, since this will be a member of a union. - RegisterRef() = default; - RegisterRef(const RegisterRef &RR) = default; - RegisterRef &operator= (const RegisterRef &RR) = default; + RegisterRef() : RegisterRef(0) {} + explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll()) + : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {} + + operator bool() const { return Reg != 0 && Mask.any(); } bool operator== (const RegisterRef &RR) const { - return Reg == RR.Reg && Sub == RR.Sub; + return Reg == RR.Reg && Mask == RR.Mask; } bool operator!= (const RegisterRef &RR) const { return !operator==(RR); } bool operator< (const RegisterRef &RR) const { - return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub); + return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask); } }; typedef std::set<RegisterRef> RegisterSet; - struct RegisterAliasInfo { - RegisterAliasInfo(const TargetRegisterInfo &tri) : TRI(tri) {} - virtual ~RegisterAliasInfo() {} - - virtual std::vector<RegisterRef> getAliasSet(RegisterRef RR) const; - virtual bool alias(RegisterRef RA, RegisterRef RB) const; - virtual bool covers(RegisterRef RA, RegisterRef RB) const; - virtual bool covers(const RegisterSet &RRs, RegisterRef RR) const; - - const TargetRegisterInfo &TRI; - }; - struct TargetOperandInfo { TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {} - virtual ~TargetOperandInfo() {} + virtual ~TargetOperandInfo() = default; + virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const; virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const; virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const; @@ -407,13 +444,115 @@ namespace rdf { const TargetInstrInfo &TII; }; + // Packed register reference. Only used for storage. + struct PackedRegisterRef { + RegisterId Reg; + uint32_t MaskId; + }; - struct DataFlowGraph; + // Template class for a map translating uint32_t into arbitrary types. + // The map will act like an indexed set: upon insertion of a new object, + // it will automatically assign a new index to it. Index of 0 is treated + // as invalid and is never allocated. + template <typename T, unsigned N = 32> + struct IndexedSet { + IndexedSet() : Map() { Map.reserve(N); } + + T get(uint32_t Idx) const { + // Index Idx corresponds to Map[Idx-1]. + assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size()); + return Map[Idx-1]; + } + + uint32_t insert(T Val) { + // Linear search. + auto F = llvm::find(Map, Val); + if (F != Map.end()) + return F - Map.begin() + 1; + Map.push_back(Val); + return Map.size(); // Return actual_index + 1. + } + + uint32_t find(T Val) const { + auto F = llvm::find(Map, Val); + assert(F != Map.end()); + return F - Map.begin(); + } + + private: + std::vector<T> Map; + }; + + struct LaneMaskIndex : private IndexedSet<LaneBitmask> { + LaneMaskIndex() = default; + + LaneBitmask getLaneMaskForIndex(uint32_t K) const { + return K == 0 ? LaneBitmask::getAll() : get(K); + } + uint32_t getIndexForLaneMask(LaneBitmask LM) { + assert(LM.any()); + return LM.all() ? 0 : insert(LM); + } + uint32_t getIndexForLaneMask(LaneBitmask LM) const { + assert(LM.any()); + return LM.all() ? 0 : find(LM); + } + + PackedRegisterRef pack(RegisterRef RR) { + return { RR.Reg, getIndexForLaneMask(RR.Mask) }; + } + PackedRegisterRef pack(RegisterRef RR) const { + return { RR.Reg, getIndexForLaneMask(RR.Mask) }; + } + + RegisterRef unpack(PackedRegisterRef PR) const { + return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId)); + } + }; + + struct RegisterAggr { + RegisterAggr(const TargetRegisterInfo &tri) + : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {} + RegisterAggr(const RegisterAggr &RG) = default; + + bool empty() const { return Masks.empty(); } + bool hasAliasOf(RegisterRef RR) const; + bool hasCoverOf(RegisterRef RR) const; + static bool isCoverOf(RegisterRef RA, RegisterRef RB, + const TargetRegisterInfo &TRI) { + return RegisterAggr(TRI).insert(RA).hasCoverOf(RB); + } + + RegisterAggr &insert(RegisterRef RR); + RegisterAggr &insert(const RegisterAggr &RG); + RegisterAggr &clear(RegisterRef RR); + RegisterAggr &clear(const RegisterAggr &RG); + + RegisterRef clearIn(RegisterRef RR) const; + + void print(raw_ostream &OS) const; + + private: + typedef std::unordered_map<RegisterId, LaneBitmask> MapType; + + public: + typedef MapType::const_iterator iterator; + iterator begin() const { return Masks.begin(); } + iterator end() const { return Masks.end(); } + RegisterRef normalize(RegisterRef RR) const; + + private: + MapType Masks; + BitVector ExpAliasUnits; // Register units for explicit aliases. + bool CheckUnits; + const TargetRegisterInfo &TRI; + }; struct NodeBase { public: // Make sure this is a POD. NodeBase() = default; + uint16_t getType() const { return NodeAttrs::type(Attrs); } uint16_t getKind() const { return NodeAttrs::kind(Attrs); } uint16_t getFlags() const { return NodeAttrs::flags(Attrs); } @@ -454,7 +593,7 @@ namespace rdf { }; union { MachineOperand *Op; // Non-phi refs point to a machine operand. - RegisterRef RR; // Phi refs store register info directly. + PackedRegisterRef PR; // Phi refs store register info directly. }; }; @@ -475,29 +614,36 @@ namespace rdf { struct RefNode : public NodeBase { RefNode() = default; - RegisterRef getRegRef() const; + + RegisterRef getRegRef(const DataFlowGraph &G) const; + MachineOperand &getOp() { assert(!(getFlags() & NodeAttrs::PhiRef)); return *Ref.Op; } - void setRegRef(RegisterRef RR); - void setRegRef(MachineOperand *Op); + + void setRegRef(RegisterRef RR, DataFlowGraph &G); + void setRegRef(MachineOperand *Op, DataFlowGraph &G); + NodeId getReachingDef() const { return Ref.RD; } void setReachingDef(NodeId RD) { Ref.RD = RD; } + NodeId getSibling() const { return Ref.Sib; } void setSibling(NodeId Sib) { Ref.Sib = Sib; } + bool isUse() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Use; } + bool isDef() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Def; @@ -581,6 +727,7 @@ namespace rdf { MachineBasicBlock *getCode() const { return CodeNode::getCode<MachineBasicBlock*>(); } + void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G); }; @@ -588,6 +735,7 @@ namespace rdf { MachineFunction *getCode() const { return CodeNode::getCode<MachineFunction*>(); } + NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB, const DataFlowGraph &G) const; NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G); @@ -596,50 +744,39 @@ namespace rdf { struct DataFlowGraph { DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, - const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai, - const TargetOperandInfo &toi); + const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi); NodeBase *ptr(NodeId N) const; template <typename T> T ptr(NodeId N) const { return static_cast<T>(ptr(N)); } + NodeId id(const NodeBase *P) const; template <typename T> NodeAddr<T> addr(NodeId N) const { return { ptr<T>(N), N }; } - NodeAddr<FuncNode*> getFunc() const { - return Func; - } - MachineFunction &getMF() const { - return MF; - } - const TargetInstrInfo &getTII() const { - return TII; - } - const TargetRegisterInfo &getTRI() const { - return TRI; - } - const MachineDominatorTree &getDT() const { - return MDT; - } - const MachineDominanceFrontier &getDF() const { - return MDF; - } - const RegisterAliasInfo &getRAI() const { - return RAI; - } + NodeAddr<FuncNode*> getFunc() const { return Func; } + MachineFunction &getMF() const { return MF; } + const TargetInstrInfo &getTII() const { return TII; } + const TargetRegisterInfo &getTRI() const { return TRI; } + const MachineDominatorTree &getDT() const { return MDT; } + const MachineDominanceFrontier &getDF() const { return MDF; } struct DefStack { DefStack() = default; + bool empty() const { return Stack.empty() || top() == bottom(); } + private: typedef NodeAddr<DefNode*> value_type; struct Iterator { typedef DefStack::value_type value_type; + Iterator &up() { Pos = DS.nextUp(Pos); return *this; } Iterator &down() { Pos = DS.nextDown(Pos); return *this; } + value_type operator*() const { assert(Pos >= 1); return DS.Stack[Pos-1]; @@ -650,14 +787,17 @@ namespace rdf { } bool operator==(const Iterator &It) const { return Pos == It.Pos; } bool operator!=(const Iterator &It) const { return Pos != It.Pos; } + private: Iterator(const DefStack &S, bool Top); + // Pos-1 is the index in the StorageType object that corresponds to // the top of the DefStack. const DefStack &DS; unsigned Pos; friend struct DefStack; }; + public: typedef Iterator iterator; iterator top() const { return Iterator(*this, true); } @@ -668,24 +808,37 @@ namespace rdf { void pop(); void start_block(NodeId N); void clear_block(NodeId N); + private: friend struct Iterator; typedef std::vector<value_type> StorageType; + bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const { return (P.Addr == nullptr) && (N == 0 || P.Id == N); } + unsigned nextUp(unsigned P) const; unsigned nextDown(unsigned P) const; + StorageType Stack; }; - typedef std::map<RegisterRef,DefStack> DefStackMap; + // Make this std::unordered_map for speed of accessing elements. + // Map: Register (physical or virtual) -> DefStack + typedef std::unordered_map<RegisterId,DefStack> DefStackMap; void build(unsigned Options = BuildOptions::None); void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM); void markBlock(NodeId B, DefStackMap &DefM); void releaseBlock(NodeId B, DefStackMap &DefM); + PackedRegisterRef pack(RegisterRef RR) { return LMI.pack(RR); } + PackedRegisterRef pack(RegisterRef RR) const { return LMI.pack(RR); } + RegisterRef unpack(PackedRegisterRef PR) const { return LMI.unpack(PR); } + RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const; + RegisterRef normalizeRef(RegisterRef RR) const; + RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const; + NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA) const; NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, @@ -705,6 +858,7 @@ namespace rdf { if (RemoveFromOwner) removeFromOwner(UA); } + void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) { unlinkDefDF(DA); if (RemoveFromOwner) @@ -717,27 +871,42 @@ namespace rdf { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == Kind; } + template <uint16_t Kind> static bool IsCode(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == Kind; } + static bool IsDef(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Def; } + static bool IsUse(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Use; } + static bool IsPhi(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == NodeAttrs::Phi; } + static bool IsPreservingDef(const NodeAddr<DefNode*> DA) { + uint16_t Flags = DA.Addr->getFlags(); + return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef); + } + + // Register aliasing. + bool alias(RegisterRef RA, RegisterRef RB) const; + private: void reset(); + RegisterSet getAliasSet(RegisterId Reg) const; + RegisterSet getLandingPadLiveIns() const; + NodeAddr<NodeBase*> newNode(uint16_t Attrs); NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B); NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner, @@ -778,21 +947,28 @@ namespace rdf { void unlinkUseDF(NodeAddr<UseNode*> UA); void unlinkDefDF(NodeAddr<DefNode*> DA); + void removeFromOwner(NodeAddr<RefNode*> RA) { NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this); IA.Addr->removeMember(RA, *this); } - TimerGroup TimeG; + NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) { + return BlockNodes[BB]; + } + NodeAddr<FuncNode*> Func; NodeAllocator Memory; + // Local map: MachineBasicBlock -> NodeAddr<BlockNode*> + std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes; + // Lane mask map. + LaneMaskIndex LMI; MachineFunction &MF; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; const MachineDominatorTree &MDT; const MachineDominanceFrontier &MDF; - const RegisterAliasInfo &RAI; const TargetOperandInfo &TOI; }; // struct DataFlowGraph @@ -806,7 +982,7 @@ namespace rdf { while (NA.Addr != this) { if (NA.Addr->getType() == NodeAttrs::Ref) { NodeAddr<RefNode*> RA = NA; - if (RA.Addr->getRegRef() == RR && P(NA)) + if (RA.Addr->getRegRef(G) == RR && P(NA)) return NA; if (NextOnly) break; @@ -837,6 +1013,12 @@ namespace rdf { return MM; } + // Optionally print the lane mask, if it is not ~0. + struct PrintLaneMaskOpt { + PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {} + LaneBitmask Mask; + }; + raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P); template <typename T> struct Print; template <typename T> @@ -854,7 +1036,9 @@ namespace rdf { PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g) : Print<NodeAddr<T>>(x, g) {} }; -} // namespace rdf -} // namespace llvm -#endif // RDF_GRAPH_H +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp index 641f014..e74c4bf 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp +++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp @@ -41,10 +41,10 @@ namespace rdf { template<> raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) { OS << '{'; - for (auto I : P.Obj) { - OS << ' ' << Print<RegisterRef>(I.first, P.G) << '{'; + for (auto &I : P.Obj) { + OS << ' ' << PrintReg(I.first, &P.G.getTRI()) << '{'; for (auto J = I.second.begin(), E = I.second.end(); J != E; ) { - OS << Print<NodeId>(*J, P.G); + OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second); if (++J != E) OS << ','; } @@ -85,10 +85,19 @@ namespace rdf { // the data-flow. NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, - NodeAddr<RefNode*> RefA, bool FullChain, const RegisterSet &DefRRs) { + NodeAddr<RefNode*> RefA, bool FullChain, const RegisterAggr &DefRRs) { + NodeList RDefs; // Return value. SetVector<NodeId> DefQ; SetVector<NodeId> Owners; + // Dead defs will be treated as if they were live, since they are actually + // on the data-flow path. They cannot be ignored because even though they + // do not generate meaningful values, they still modify registers. + + // If the reference is undefined, there is nothing to do. + if (RefA.Addr->getFlags() & NodeAttrs::Undef) + return RDefs; + // The initial queue should not have reaching defs for shadows. The // whole point of a shadow is that it will have a reaching def that // is not aliased to the reaching defs of the related shadows. @@ -108,26 +117,24 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, if (TA.Addr->getFlags() & NodeAttrs::PhiRef) continue; // Stop at the covering/overwriting def of the initial register reference. - RegisterRef RR = TA.Addr->getRegRef(); - if (RAI.covers(RR, RefRR)) { - uint16_t Flags = TA.Addr->getFlags(); - if (!(Flags & NodeAttrs::Preserving)) + RegisterRef RR = TA.Addr->getRegRef(DFG); + if (!DFG.IsPreservingDef(TA)) + if (RegisterAggr::isCoverOf(RR, RefRR, TRI)) continue; - } // Get the next level of reaching defs. This will include multiple // reaching defs for shadows. for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA)) - if (auto RD = NodeAddr<RefNode*>(S).Addr->getReachingDef()) + if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef()) DefQ.insert(RD); } // Remove all non-phi defs that are not aliased to RefRR, and collect // the owners of the remaining defs. SetVector<NodeId> Defs; - for (auto N : DefQ) { + for (NodeId N : DefQ) { auto TA = DFG.addr<DefNode*>(N); bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; - if (!IsPhi && !RAI.alias(RefRR, TA.Addr->getRegRef())) + if (!IsPhi && !DFG.alias(RefRR, TA.Addr->getRegRef(DFG))) continue; Defs.insert(TA.Id); Owners.insert(TA.Addr->getOwner(DFG).Id); @@ -156,8 +163,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, if (StmtA) { if (!StmtB) // OB is a phi and phis dominate statements. return true; - auto CA = NodeAddr<StmtNode*>(OA).Addr->getCode(); - auto CB = NodeAddr<StmtNode*>(OB).Addr->getCode(); + MachineInstr *CA = NodeAddr<StmtNode*>(OA).Addr->getCode(); + MachineInstr *CB = NodeAddr<StmtNode*>(OB).Addr->getCode(); // The order must be linear, so tie-break such equalities. if (CA == CB) return A < B; @@ -189,21 +196,20 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, // covered if we added A first, and A would be covered // if we added B first. - NodeList RDefs; - RegisterSet RRs = DefRRs; + RegisterAggr RRs(DefRRs); auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool { return TA.Addr->getKind() == NodeAttrs::Def && Defs.count(TA.Id); }; - for (auto T : Tmp) { - if (!FullChain && RAI.covers(RRs, RefRR)) + for (NodeId T : Tmp) { + if (!FullChain && RRs.hasCoverOf(RefRR)) break; auto TA = DFG.addr<InstrNode*>(T); bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA); NodeList Ds; for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) { - auto QR = DA.Addr->getRegRef(); + RegisterRef QR = DA.Addr->getRegRef(DFG); // Add phi defs even if they are covered by subsequent defs. This is // for cases where the reached use is not covered by any of the defs // encountered so far: the phi def is needed to expose the liveness @@ -212,7 +218,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2. // d2<R3>(d1,,u3), ... // ..., u3<D1>(d2) This use needs to be live on entry. - if (FullChain || IsPhi || !RAI.covers(RRs, QR)) + if (FullChain || IsPhi || !RRs.hasCoverOf(QR)) Ds.push_back(DA); } RDefs.insert(RDefs.end(), Ds.begin(), Ds.end()); @@ -221,19 +227,17 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, // defs to actually define a register. uint16_t Flags = DA.Addr->getFlags(); if (!FullChain || !(Flags & NodeAttrs::PhiRef)) - if (!(Flags & NodeAttrs::Preserving)) - RRs.insert(DA.Addr->getRegRef()); + if (!(Flags & NodeAttrs::Preserving)) // Don't care about Undef here. + RRs.insert(DA.Addr->getRegRef(DFG)); } } - return RDefs; -} - - -static const RegisterSet NoRegs; + auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool { + return DA.Addr->getFlags() & NodeAttrs::Dead; + }; + RDefs.resize(std::distance(RDefs.begin(), remove_if(RDefs, DeadP))); -NodeList Liveness::getAllReachingDefs(NodeAddr<RefNode*> RefA) { - return getAllReachingDefs(RefA.Addr->getRegRef(), RefA, false, NoRegs); + return RDefs; } @@ -241,20 +245,20 @@ NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs) { // Collect all defined registers. Do not consider phis to be defining // anything, only collect "real" definitions. - RegisterSet DefRRs; - for (const auto D : Defs) { + RegisterAggr DefRRs(TRI); + for (NodeId D : Defs) { const auto DA = DFG.addr<const DefNode*>(D); if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef)) - DefRRs.insert(DA.Addr->getRegRef()); + DefRRs.insert(DA.Addr->getRegRef(DFG)); } - auto RDs = getAllReachingDefs(RefRR, RefA, true, DefRRs); + NodeList RDs = getAllReachingDefs(RefRR, RefA, true, DefRRs); if (RDs.empty()) return Defs; // Make a copy of the preexisting definitions and add the newly found ones. NodeSet TmpDefs = Defs; - for (auto R : RDs) + for (NodeAddr<NodeBase*> R : RDs) TmpDefs.insert(R.Id); NodeSet Result = Defs; @@ -279,39 +283,43 @@ NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeSet Liveness::getAllReachedUses(RegisterRef RefRR, - NodeAddr<DefNode*> DefA, const RegisterSet &DefRRs) { + NodeAddr<DefNode*> DefA, const RegisterAggr &DefRRs) { NodeSet Uses; // If the original register is already covered by all the intervening // defs, no more uses can be reached. - if (RAI.covers(DefRRs, RefRR)) + if (DefRRs.hasCoverOf(RefRR)) return Uses; // Add all directly reached uses. - NodeId U = DefA.Addr->getReachedUse(); + // If the def is dead, it does not provide a value for any use. + bool IsDead = DefA.Addr->getFlags() & NodeAttrs::Dead; + NodeId U = !IsDead ? DefA.Addr->getReachedUse() : 0; while (U != 0) { auto UA = DFG.addr<UseNode*>(U); - auto UR = UA.Addr->getRegRef(); - if (RAI.alias(RefRR, UR) && !RAI.covers(DefRRs, UR)) - Uses.insert(U); + if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) { + RegisterRef UR = UA.Addr->getRegRef(DFG); + if (DFG.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR)) + Uses.insert(U); + } U = UA.Addr->getSibling(); } - // Traverse all reached defs. + // Traverse all reached defs. This time dead defs cannot be ignored. for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) { auto DA = DFG.addr<DefNode*>(D); NextD = DA.Addr->getSibling(); - auto DR = DA.Addr->getRegRef(); + RegisterRef DR = DA.Addr->getRegRef(DFG); // If this def is already covered, it cannot reach anything new. // Similarly, skip it if it is not aliased to the interesting register. - if (RAI.covers(DefRRs, DR) || !RAI.alias(RefRR, DR)) + if (DefRRs.hasCoverOf(DR) || !DFG.alias(RefRR, DR)) continue; NodeSet T; - if (DA.Addr->getFlags() & NodeAttrs::Preserving) { + if (DFG.IsPreservingDef(DA)) { // If it is a preserving def, do not update the set of intervening defs. T = getAllReachedUses(RefRR, DA, DefRRs); } else { - RegisterSet NewDefRRs = DefRRs; + RegisterAggr NewDefRRs = DefRRs; NewDefRRs.insert(DR); T = getAllReachedUses(RefRR, DA, NewDefRRs); } @@ -326,42 +334,57 @@ void Liveness::computePhiInfo() { NodeList Phis; NodeAddr<FuncNode*> FA = DFG.getFunc(); - auto Blocks = FA.Addr->members(DFG); + NodeList Blocks = FA.Addr->members(DFG); for (NodeAddr<BlockNode*> BA : Blocks) { auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); Phis.insert(Phis.end(), Ps.begin(), Ps.end()); } // phi use -> (map: reaching phi -> set of registers defined in between) - std::map<NodeId,std::map<NodeId,RegisterSet>> PhiUp; + std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp; std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation. // Go over all phis. for (NodeAddr<PhiNode*> PhiA : Phis) { // Go over all defs and collect the reached uses that are non-phi uses // (i.e. the "real uses"). - auto &RealUses = RealUseMap[PhiA.Id]; - auto PhiRefs = PhiA.Addr->members(DFG); + RefMap &RealUses = RealUseMap[PhiA.Id]; + NodeList PhiRefs = PhiA.Addr->members(DFG); // Have a work queue of defs whose reached uses need to be found. // For each def, add to the queue all reached (non-phi) defs. SetVector<NodeId> DefQ; NodeSet PhiDefs; - for (auto R : PhiRefs) { + for (NodeAddr<RefNode*> R : PhiRefs) { if (!DFG.IsRef<NodeAttrs::Def>(R)) continue; DefQ.insert(R.Id); PhiDefs.insert(R.Id); } + + // Collect the super-set of all possible reached uses. This set will + // contain all uses reached from this phi, either directly from the + // phi defs, or (recursively) via non-phi defs reached by the phi defs. + // This set of uses will later be trimmed to only contain these uses that + // are actually reached by the phi defs. for (unsigned i = 0; i < DefQ.size(); ++i) { NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]); - NodeId UN = DA.Addr->getReachedUse(); + // Visit all reached uses. Phi defs should not really have the "dead" + // flag set, but check it anyway for consistency. + bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead; + NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0; while (UN != 0) { NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN); - if (!(A.Addr->getFlags() & NodeAttrs::PhiRef)) - RealUses[getRestrictedRegRef(A)].insert(A.Id); + uint16_t F = A.Addr->getFlags(); + if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) { + RegisterRef R = DFG.normalizeRef(getRestrictedRegRef(A)); + RealUses[R.Reg].insert({A.Id,R.Mask}); + } UN = A.Addr->getSibling(); } + // Visit all reached defs, and add them to the queue. These defs may + // override some of the uses collected here, but that will be handled + // later. NodeId DN = DA.Addr->getReachedDef(); while (DN != 0) { NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN); @@ -388,7 +411,7 @@ void Liveness::computePhiInfo() { // = R1:0 u6 Not reached by d1 (covered collectively // by d3 and d5), but following reached // defs and uses from d1 will lead here. - auto HasDef = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool { + auto InPhiDefs = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool { return PhiDefs.count(DA.Id); }; for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) { @@ -396,11 +419,14 @@ void Liveness::computePhiInfo() { // uses of it. For each such use, check if it is reached by this phi, // i.e. check if the set of its reaching uses intersects the set of // this phi's defs. - auto &Uses = UI->second; + NodeRefSet &Uses = UI->second; for (auto I = Uses.begin(), E = Uses.end(); I != E; ) { - auto UA = DFG.addr<UseNode*>(*I); - NodeList RDs = getAllReachingDefs(UI->first, UA); - if (std::any_of(RDs.begin(), RDs.end(), HasDef)) + auto UA = DFG.addr<UseNode*>(I->first); + // Undef flag is checked above. + assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0); + RegisterRef R(UI->first, I->second); + NodeList RDs = getAllReachingDefs(R, UA); + if (any_of(RDs, InPhiDefs)) ++I; else I = Uses.erase(I); @@ -418,31 +444,50 @@ void Liveness::computePhiInfo() { // Go over all phi uses and check if the reaching def is another phi. // Collect the phis that are among the reaching defs of these uses. - // While traversing the list of reaching defs for each phi use, collect - // the set of registers defined between this phi (Phi) and the owner phi + // While traversing the list of reaching defs for each phi use, accumulate + // the set of registers defined between this phi (PhiA) and the owner phi // of the reaching def. + NodeSet SeenUses; + for (auto I : PhiRefs) { - if (!DFG.IsRef<NodeAttrs::Use>(I)) + if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id)) continue; NodeAddr<UseNode*> UA = I; - auto &UpMap = PhiUp[UA.Id]; - RegisterSet DefRRs; - for (NodeAddr<DefNode*> DA : getAllReachingDefs(UA)) { - if (DA.Addr->getFlags() & NodeAttrs::PhiRef) - UpMap[DA.Addr->getOwner(DFG).Id] = DefRRs; - else - DefRRs.insert(DA.Addr->getRegRef()); + + // Given a phi use UA, traverse all related phi uses (including UA). + // The related phi uses may reach different phi nodes or may reach the + // same phi node. If multiple uses reach the same phi P, the intervening + // defs must be accumulated for all such uses. To group all such uses + // into one set, map their node ids to the first use id that reaches P. + std::map<NodeId,NodeId> FirstUse; // Phi reached up -> first phi use. + + for (NodeAddr<UseNode*> VA : DFG.getRelatedRefs(PhiA, UA)) { + SeenUses.insert(VA.Id); + RegisterAggr DefRRs(TRI); + for (NodeAddr<DefNode*> DA : getAllReachingDefs(VA)) { + if (DA.Addr->getFlags() & NodeAttrs::PhiRef) { + NodeId RP = DA.Addr->getOwner(DFG).Id; + NodeId FU = FirstUse.insert({RP,VA.Id}).first->second; + std::map<NodeId,RegisterAggr> &M = PhiUp[FU]; + auto F = M.find(RP); + if (F == M.end()) + M.insert(std::make_pair(RP, DefRRs)); + else + F->second.insert(DefRRs); + } + DefRRs.insert(DA.Addr->getRegRef(DFG)); + } } } } if (Trace) { - dbgs() << "Phi-up-to-phi map:\n"; + dbgs() << "Phi-up-to-phi map with intervening defs:\n"; for (auto I : PhiUp) { dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {"; for (auto R : I.second) dbgs() << ' ' << Print<NodeId>(R.first, DFG) - << Print<RegisterSet>(R.second, DFG); + << Print<RegisterAggr>(R.second, DFG); dbgs() << " }\n"; } } @@ -467,40 +512,50 @@ void Liveness::computePhiInfo() { // // When propagating uses up the phi chains, get the all reaching defs // for a given phi use, and traverse the list until the propagated ref - // is covered, or until or until reaching the final phi. Only assume - // that the reference reaches the phi in the latter case. + // is covered, or until reaching the final phi. Only assume that the + // reference reaches the phi in the latter case. for (unsigned i = 0; i < PhiUQ.size(); ++i) { auto PA = DFG.addr<PhiNode*>(PhiUQ[i]); - auto &RealUses = RealUseMap[PA.Id]; - for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { - NodeAddr<UseNode*> UA = U; - auto &UpPhis = PhiUp[UA.Id]; - for (auto UP : UpPhis) { + NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG); + RefMap &RUM = RealUseMap[PA.Id]; + + for (NodeAddr<UseNode*> UA : PUs) { + std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id]; + RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(UA)); + for (const std::pair<NodeId,RegisterAggr> &P : PUM) { bool Changed = false; - auto &MidDefs = UP.second; - // Collect the set UpReached of uses that are reached by the current - // phi PA, and are not covered by any intervening def between PA and - // the upward phi UP. - RegisterSet UpReached; - for (auto T : RealUses) { - if (!isRestricted(PA, UA, T.first)) - continue; - if (!RAI.covers(MidDefs, T.first)) - UpReached.insert(T.first); - } - if (UpReached.empty()) + const RegisterAggr &MidDefs = P.second; + + // Collect the set PropUp of uses that are reached by the current + // phi PA, and are not covered by any intervening def between the + // currently visited use UA and the the upward phi P. + + if (MidDefs.hasCoverOf(UR)) continue; - // Update the set PRUs of real uses reached by the upward phi UP with - // the actual set of uses (UpReached) that the UP phi reaches. - auto &PRUs = RealUseMap[UP.first]; - for (auto R : UpReached) { - unsigned Z = PRUs[R].size(); - PRUs[R].insert(RealUses[R].begin(), RealUses[R].end()); - Changed |= (PRUs[R].size() != Z); + + // General algorithm: + // for each (R,U) : U is use node of R, U is reached by PA + // if MidDefs does not cover (R,U) + // then add (R-MidDefs,U) to RealUseMap[P] + // + for (const std::pair<RegisterId,NodeRefSet> &T : RUM) { + RegisterRef R = DFG.restrictRef(RegisterRef(T.first), UR); + if (!R) + continue; + for (std::pair<NodeId,LaneBitmask> V : T.second) { + RegisterRef S = DFG.restrictRef(RegisterRef(R.Reg, V.second), R); + if (!S) + continue; + if (RegisterRef SS = MidDefs.clearIn(S)) { + NodeRefSet &RS = RealUseMap[P.first][SS.Reg]; + Changed |= RS.insert({V.first,SS.Mask}).second; + } + } } + if (Changed) - PhiUQ.push_back(UP.first); + PhiUQ.push_back(P.first); } } } @@ -512,7 +567,7 @@ void Liveness::computePhiInfo() { NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first); NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG); if (!Ds.empty()) { - RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(); + RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG); dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>'; } else { dbgs() << "<noreg>"; @@ -540,7 +595,7 @@ void Liveness::computeLiveIns() { // Compute IDF first, then the inverse. decltype(IIDF) IDF; - for (auto &B : MF) { + for (MachineBasicBlock &B : MF) { auto F1 = MDF.find(&B); if (F1 == MDF.end()) continue; @@ -562,20 +617,20 @@ void Liveness::computeLiveIns() { computePhiInfo(); NodeAddr<FuncNode*> FA = DFG.getFunc(); - auto Blocks = FA.Addr->members(DFG); + NodeList Blocks = FA.Addr->members(DFG); // Build the phi live-on-entry map. for (NodeAddr<BlockNode*> BA : Blocks) { MachineBasicBlock *MB = BA.Addr->getCode(); - auto &LON = PhiLON[MB]; + RefMap &LON = PhiLON[MB]; for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG)) - for (auto S : RealUseMap[P.Id]) + for (const RefMap::value_type &S : RealUseMap[P.Id]) LON[S.first].insert(S.second.begin(), S.second.end()); } if (Trace) { dbgs() << "Phi live-on-entry map:\n"; - for (auto I : PhiLON) + for (auto &I : PhiLON) dbgs() << "block #" << I.first->getNumber() << " -> " << Print<RefMap>(I.second, DFG) << '\n'; } @@ -584,33 +639,35 @@ void Liveness::computeLiveIns() { // "real" uses. Propagate this set backwards into the block predecessors // through the reaching defs of the corresponding phi uses. for (NodeAddr<BlockNode*> BA : Blocks) { - auto Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); + NodeList Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); for (NodeAddr<PhiNode*> PA : Phis) { - auto &RUs = RealUseMap[PA.Id]; + RefMap &RUs = RealUseMap[PA.Id]; if (RUs.empty()) continue; for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { - NodeAddr<PhiUseNode*> UA = U; - if (UA.Addr->getReachingDef() == 0) + NodeAddr<PhiUseNode*> PUA = U; + if (PUA.Addr->getReachingDef() == 0) continue; // Mark all reached "real" uses of P as live on exit in the // predecessor. // Remap all the RUs so that they have a correct reaching def. - auto PrA = DFG.addr<BlockNode*>(UA.Addr->getPredecessor()); - auto &LOX = PhiLOX[PrA.Addr->getCode()]; - for (auto R : RUs) { - RegisterRef RR = R.first; - if (!isRestricted(PA, UA, RR)) - RR = getRestrictedRegRef(UA); - // The restricted ref may be different from the ref that was - // accessed in the "real use". This means that this phi use - // is not the one that carries this reference, so skip it. - if (!RAI.alias(R.first, RR)) + auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor()); + RefMap &LOX = PhiLOX[PrA.Addr->getCode()]; + + RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(PUA)); + for (const std::pair<RegisterId,NodeRefSet> &T : RUs) { + // Check if T.first aliases UR? + LaneBitmask M; + for (std::pair<NodeId,LaneBitmask> P : T.second) + M |= P.second; + + RegisterRef S = DFG.restrictRef(RegisterRef(T.first, M), UR); + if (!S) continue; - for (auto D : getAllReachingDefs(RR, UA)) - LOX[RR].insert(D.Id); + for (NodeAddr<DefNode*> D : getAllReachingDefs(S, PUA)) + LOX[S.Reg].insert({D.Id, S.Mask}); } } // for U : phi uses } // for P : Phis @@ -618,7 +675,7 @@ void Liveness::computeLiveIns() { if (Trace) { dbgs() << "Phi live-on-exit map:\n"; - for (auto I : PhiLOX) + for (auto &I : PhiLOX) dbgs() << "block #" << I.first->getNumber() << " -> " << Print<RefMap>(I.second, DFG) << '\n'; } @@ -629,19 +686,41 @@ void Liveness::computeLiveIns() { // Add function live-ins to the live-in set of the function entry block. auto &EntryIn = LiveMap[&MF.front()]; for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) - EntryIn.insert({I->first,0}); + EntryIn.insert(RegisterRef(I->first)); if (Trace) { // Dump the liveness map - for (auto &B : MF) { - BitVector LV(TRI.getNumRegs()); + for (MachineBasicBlock &B : MF) { + std::vector<RegisterRef> LV; for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) - LV.set(I->PhysReg); + LV.push_back(RegisterRef(I->PhysReg, I->LaneMask)); + std::sort(LV.begin(), LV.end()); dbgs() << "BB#" << B.getNumber() << "\t rec = {"; - for (int x = LV.find_first(); x >= 0; x = LV.find_next(x)) - dbgs() << ' ' << Print<RegisterRef>({unsigned(x),0}, DFG); + for (auto I : LV) + dbgs() << ' ' << Print<RegisterRef>(I, DFG); dbgs() << " }\n"; - dbgs() << "\tcomp = " << Print<RegisterSet>(LiveMap[&B], DFG) << '\n'; + //dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n'; + + LV.clear(); + for (std::pair<RegisterId,LaneBitmask> P : LiveMap[&B]) { + MCSubRegIndexIterator S(P.first, &TRI); + if (!S.isValid()) { + LV.push_back(RegisterRef(P.first)); + continue; + } + do { + LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex()); + if ((M & P.second).any()) + LV.push_back(RegisterRef(S.getSubReg())); + ++S; + } while (S.isValid()); + } + std::sort(LV.begin(), LV.end()); + dbgs() << "\tcomp = {"; + for (auto I : LV) + dbgs() << ' ' << Print<RegisterRef>(I, DFG); + dbgs() << " }\n"; + } } } @@ -658,8 +737,7 @@ void Liveness::resetLiveIns() { // Add the newly computed live-ins. auto &LiveIns = LiveMap[&B]; for (auto I : LiveIns) { - assert(I.Sub == 0); - B.addLiveIn(I.Reg); + B.addLiveIn({MCPhysReg(I.first), I.second}); } } } @@ -672,9 +750,20 @@ void Liveness::resetKills() { void Liveness::resetKills(MachineBasicBlock *B) { - auto CopyLiveIns = [] (MachineBasicBlock *B, BitVector &LV) -> void { - for (auto I = B->livein_begin(), E = B->livein_end(); I != E; ++I) - LV.set(I->PhysReg); + auto CopyLiveIns = [this] (MachineBasicBlock *B, BitVector &LV) -> void { + for (auto I : B->liveins()) { + MCSubRegIndexIterator S(I.PhysReg, &TRI); + if (!S.isValid()) { + LV.set(I.PhysReg); + continue; + } + do { + LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex()); + if ((M & I.LaneMask).any()) + LV.set(S.getSubReg()); + ++S; + } while (S.isValid()); + } }; BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs()); @@ -724,26 +813,6 @@ void Liveness::resetKills(MachineBasicBlock *B) { } -// For shadows, determine if RR is aliased to a reaching def of any other -// shadow associated with RA. If it is not, then RR is "restricted" to RA, -// and so it can be considered a value specific to RA. This is important -// for accurately determining values associated with phi uses. -// For non-shadows, this function returns "true". -bool Liveness::isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, - RegisterRef RR) const { - NodeId Start = RA.Id; - for (NodeAddr<RefNode*> TA = DFG.getNextShadow(IA, RA); - TA.Id != 0 && TA.Id != Start; TA = DFG.getNextShadow(IA, TA)) { - NodeId RD = TA.Addr->getReachingDef(); - if (RD == 0) - continue; - if (RAI.alias(RR, DFG.addr<DefNode*>(RD).Addr->getRegRef())) - return false; - } - return true; -} - - RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const { assert(DFG.IsRef<NodeAttrs::Use>(RA)); if (RA.Addr->getFlags() & NodeAttrs::Shadow) { @@ -751,14 +820,7 @@ RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const { assert(RD); RA = DFG.addr<DefNode*>(RD); } - return RA.Addr->getRegRef(); -} - - -unsigned Liveness::getPhysReg(RegisterRef RR) const { - if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) - return 0; - return RR.Sub ? TRI.getSubReg(RR.Reg, RR.Sub) : RR.Reg; + return RA.Addr->getRegRef(DFG); } @@ -808,77 +870,99 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { } if (Trace) { - dbgs() << LLVM_FUNCTION_NAME << " in BB#" << B->getNumber() - << " after recursion into"; + dbgs() << "\n-- BB#" << B->getNumber() << ": " << __func__ + << " after recursion into: {"; for (auto I : *N) dbgs() << ' ' << I->getBlock()->getNumber(); - dbgs() << "\n LiveIn: " << Print<RefMap>(LiveIn, DFG); - dbgs() << "\n Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + dbgs() << " }\n"; + dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; } - // Add phi uses that are live on exit from this block. + // Add reaching defs of phi uses that are live on exit from this block. RefMap &PUs = PhiLOX[B]; - for (auto S : PUs) + for (auto &S : PUs) LiveIn[S.first].insert(S.second.begin(), S.second.end()); if (Trace) { dbgs() << "after LOX\n"; dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; } - // Stop tracking all uses defined in this block: erase those records - // where the reaching def is located in B and which cover all reached - // uses. - auto Copy = LiveIn; + // The LiveIn map at this point has all defs that are live-on-exit from B, + // as if they were live-on-entry to B. First, we need to filter out all + // defs that are present in this block. Then we will add reaching defs of + // all upward-exposed uses. + + // To filter out the defs, first make a copy of LiveIn, and then re-populate + // LiveIn with the defs that should remain. + RefMap LiveInCopy = LiveIn; LiveIn.clear(); - for (auto I : Copy) { - auto &Defs = LiveIn[I.first]; - NodeSet Rest; - for (auto R : I.second) { - auto DA = DFG.addr<DefNode*>(R); - RegisterRef DDR = DA.Addr->getRegRef(); + for (const std::pair<RegisterId,NodeRefSet> &LE : LiveInCopy) { + RegisterRef LRef(LE.first); + NodeRefSet &NewDefs = LiveIn[LRef.Reg]; // To be filled. + const NodeRefSet &OldDefs = LE.second; + for (NodeRef OR : OldDefs) { + // R is a def node that was live-on-exit + auto DA = DFG.addr<DefNode*>(OR.first); NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG); NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); - // Defs from a different block need to be preserved. Defs from this - // block will need to be processed further, except for phi defs, the - // liveness of which is handled through the PhiLON/PhiLOX maps. - if (B != BA.Addr->getCode()) - Defs.insert(R); - else { - bool IsPreserving = DA.Addr->getFlags() & NodeAttrs::Preserving; - if (IA.Addr->getKind() != NodeAttrs::Phi && !IsPreserving) { - bool Covering = RAI.covers(DDR, I.first); - NodeId U = DA.Addr->getReachedUse(); - while (U && Covering) { - auto DUA = DFG.addr<UseNode*>(U); - RegisterRef Q = DUA.Addr->getRegRef(); - Covering = RAI.covers(DA.Addr->getRegRef(), Q); - U = DUA.Addr->getSibling(); - } - if (!Covering) - Rest.insert(R); - } + if (B != BA.Addr->getCode()) { + // Defs from a different block need to be preserved. Defs from this + // block will need to be processed further, except for phi defs, the + // liveness of which is handled through the PhiLON/PhiLOX maps. + NewDefs.insert(OR); + continue; + } + + // Defs from this block need to stop the liveness from being + // propagated upwards. This only applies to non-preserving defs, + // and to the parts of the register actually covered by those defs. + // (Note that phi defs should always be preserving.) + RegisterAggr RRs(TRI); + LRef.Mask = OR.second; + + if (!DFG.IsPreservingDef(DA)) { + assert(!(IA.Addr->getFlags() & NodeAttrs::Phi)); + // DA is a non-phi def that is live-on-exit from this block, and + // that is also located in this block. LRef is a register ref + // whose use this def reaches. If DA covers LRef, then no part + // of LRef is exposed upwards.A + if (RRs.insert(DA.Addr->getRegRef(DFG)).hasCoverOf(LRef)) + continue; } - } - // Non-covering defs from B. - for (auto R : Rest) { - auto DA = DFG.addr<DefNode*>(R); - RegisterRef DRR = DA.Addr->getRegRef(); - RegisterSet RRs; + // DA itself was not sufficient to cover LRef. In general, it is + // the last in a chain of aliased defs before the exit from this block. + // There could be other defs in this block that are a part of that + // chain. Check that now: accumulate the registers from these defs, + // and if they all together cover LRef, it is not live-on-entry. for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) { - NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG); - NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); - // Preserving defs do not count towards covering. + // DefNode -> InstrNode -> BlockNode. + NodeAddr<InstrNode*> ITA = TA.Addr->getOwner(DFG); + NodeAddr<BlockNode*> BTA = ITA.Addr->getOwner(DFG); + // Reaching defs are ordered in the upward direction. + if (BTA.Addr->getCode() != B) { + // We have reached past the beginning of B, and the accumulated + // registers are not covering LRef. The first def from the + // upward chain will be live. + // Subtract all accumulated defs (RRs) from LRef. + RegisterAggr L(TRI); + L.insert(LRef).clear(RRs); + assert(!L.empty()); + NewDefs.insert({TA.Id,L.begin()->second}); + break; + } + + // TA is in B. Only add this def to the accumulated cover if it is + // not preserving. if (!(TA.Addr->getFlags() & NodeAttrs::Preserving)) - RRs.insert(TA.Addr->getRegRef()); - if (BA.Addr->getCode() == B) - continue; - if (RAI.covers(RRs, DRR)) + RRs.insert(TA.Addr->getRegRef(DFG)); + // If this is enough to cover LRef, then stop. + if (RRs.hasCoverOf(LRef)) break; - Defs.insert(TA.Id); } } } @@ -888,7 +972,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (Trace) { dbgs() << "after defs in block\n"; dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; } // Scan the block for upward-exposed uses and add them to the tracking set. @@ -897,38 +981,44 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (IA.Addr->getKind() != NodeAttrs::Stmt) continue; for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) { - RegisterRef RR = UA.Addr->getRegRef(); - for (auto D : getAllReachingDefs(UA)) + if (UA.Addr->getFlags() & NodeAttrs::Undef) + continue; + RegisterRef RR = DFG.normalizeRef(UA.Addr->getRegRef(DFG)); + for (NodeAddr<DefNode*> D : getAllReachingDefs(UA)) if (getBlockWithRef(D.Id) != B) - LiveIn[RR].insert(D.Id); + LiveIn[RR.Reg].insert({D.Id,RR.Mask}); } } if (Trace) { dbgs() << "after uses in block\n"; dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; } // Phi uses should not be propagated up the dominator tree, since they // are not dominated by their corresponding reaching defs. - auto &Local = LiveMap[B]; - auto &LON = PhiLON[B]; - for (auto R : LON) - Local.insert(R.first); + RegisterAggr &Local = LiveMap[B]; + RefMap &LON = PhiLON[B]; + for (auto &R : LON) { + LaneBitmask M; + for (auto P : R.second) + M |= P.second; + Local.insert(RegisterRef(R.first,M)); + } if (Trace) { dbgs() << "after phi uses in block\n"; dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterSet>(Local, DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n'; } for (auto C : IIDF[B]) { - auto &LiveC = LiveMap[C]; - for (auto S : LiveIn) + RegisterAggr &LiveC = LiveMap[C]; + for (const std::pair<RegisterId,NodeRefSet> &S : LiveIn) for (auto R : S.second) - if (MDT.properlyDominates(getBlockWithRef(R), C)) - LiveC.insert(S.first); + if (MDT.properlyDominates(getBlockWithRef(R.first), C)) + LiveC.insert(RegisterRef(S.first, R.second)); } } diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h index 2b49c74..c88396f 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h +++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h @@ -30,20 +30,44 @@ namespace llvm { namespace rdf { struct Liveness { public: - typedef std::map<MachineBasicBlock*,RegisterSet> LiveMapType; - typedef std::map<RegisterRef,NodeSet> RefMap; + // This is really a std::map, except that it provides a non-trivial + // default constructor to the element accessed via []. + struct LiveMapType { + LiveMapType(const TargetRegisterInfo &tri) : Empty(tri) {} + + RegisterAggr &operator[] (MachineBasicBlock *B) { + return Map.emplace(B, Empty).first->second; + } + private: + RegisterAggr Empty; + std::map<MachineBasicBlock*,RegisterAggr> Map; + }; + + typedef std::pair<NodeId,LaneBitmask> NodeRef; + typedef std::set<NodeRef> NodeRefSet; + // RegisterId in RefMap must be normalized. + typedef std::map<RegisterId,NodeRefSet> RefMap; Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g) : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()), - RAI(g.getRAI()), MRI(mri), Empty(), Trace(false) {} + MRI(mri), LiveMap(g.getTRI()), Empty(), NoRegs(g.getTRI()), + Trace(false) {} NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA, - bool FullChain = false, const RegisterSet &DefRRs = RegisterSet()); - NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA); + bool FullChain, const RegisterAggr &DefRRs); + NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) { + return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false, NoRegs); + } + NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) { + return getAllReachingDefs(RefRR, RefA, false, NoRegs); + } NodeSet getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs); NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA, - const RegisterSet &DefRRs = RegisterSet()); + const RegisterAggr &DefRRs); + NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) { + return getAllReachedUses(RefRR, DefA, NoRegs); + } LiveMapType &getLiveMap() { return LiveMap; } const LiveMapType &getLiveMap() const { return LiveMap; } @@ -65,10 +89,10 @@ namespace rdf { const TargetRegisterInfo &TRI; const MachineDominatorTree &MDT; const MachineDominanceFrontier &MDF; - const RegisterAliasInfo &RAI; MachineRegisterInfo &MRI; LiveMapType LiveMap; const RefMap Empty; + const RegisterAggr NoRegs; bool Trace; // Cache of mapping from node ids (for RefNodes) to the containing @@ -79,7 +103,8 @@ namespace rdf { // Phi information: // - // map: NodeId -> (map: RegisterRef -> NodeSet) + // RealUseMap + // map: NodeId -> (map: RegisterId -> NodeRefSet) // phi id -> (map: register -> set of reached non-phi uses) std::map<NodeId, RefMap> RealUseMap; @@ -96,10 +121,9 @@ namespace rdf { // the dominator tree), create a map: block -> set of uses live on exit. std::map<MachineBasicBlock*,RefMap> PhiLOX; - bool isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, + bool isRestrictedToRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, RegisterRef RR) const; RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const; - unsigned getPhysReg(RegisterRef RR) const; MachineBasicBlock *getBlockWithRef(NodeId RN) const; void traverse(MachineBasicBlock *B, RefMap &LiveIn); void emptify(RefMap &M); diff --git a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp index 40f6c8d..0554646 100644 --- a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp @@ -12,8 +12,12 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -Target llvm::TheHexagonTarget; +Target &llvm::getTheHexagonTarget() { + static Target TheHexagonTarget; + return TheHexagonTarget; +} extern "C" void LLVMInitializeHexagonTargetInfo() { - RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon"); + RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(getTheHexagonTarget(), + "hexagon", "Hexagon"); } |