diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon')
109 files changed, 66628 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp new file mode 100644 index 0000000..a8622a9 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -0,0 +1,2152 @@ +//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mcasmparser" + +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonTargetStreamer.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonMCChecker.h" +#include "MCTargetDesc/HexagonMCExpr.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonMCAsmInfo.h" +#include "MCTargetDesc/HexagonShuffler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include <sstream> + +using namespace llvm; + +static cl::opt<bool> EnableFutureRegs("mfuture-regs", + cl::desc("Enable future registers")); + +static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis", +cl::desc("Warn for missing parenthesis around predicate registers"), +cl::init(true)); +static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis", +cl::desc("Error for missing parenthesis around predicate registers"), +cl::init(false)); +static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch", +cl::desc("Warn for mismatching a signed and unsigned value"), +cl::init(true)); +static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register", +cl::desc("Warn for register names that arent contigious"), +cl::init(true)); +static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register", +cl::desc("Error for register names that aren't contigious"), +cl::init(false)); + + +namespace { +struct HexagonOperand; + +class HexagonAsmParser : public MCTargetAsmParser { + + HexagonTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer(); + return static_cast<HexagonTargetStreamer &>(TS); + } + + MCAsmParser &Parser; + MCAssembler *Assembler; + MCInstrInfo const &MCII; + MCInst MCB; + bool InBrackets; + + MCAsmParser &getParser() const { return Parser; } + MCAssembler *getAssembler() const { return Assembler; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + bool equalIsAsmAssignment() override { return false; } + bool isLabel(AsmToken &Token) override; + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool ParseDirectiveFalign(unsigned Size, SMLoc L); + + virtual bool ParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) override; + bool ParseDirectiveSubsection(SMLoc L); + bool ParseDirectiveValue(unsigned Size, SMLoc L); + bool ParseDirectiveComm(bool IsLocal, SMLoc L); + bool RegisterMatchesArch(unsigned MatchNum) const; + + bool matchBundleOptions(); + bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc); + bool finishBundle(SMLoc IDLoc, MCStreamer &Out); + void canonicalizeImmediates(MCInst &MCI); + bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc, + OperandVector &InstOperands, uint64_t &ErrorInfo, + bool MatchingInlineAsm, bool &MustExtend); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, bool MatchingInlineAsm) override; + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; + void OutOfRange(SMLoc IDLoc, long long Val, long long Max); + int processInstruction(MCInst &Inst, OperandVector const &Operands, + SMLoc IDLoc, bool &MustExtend); + + // Check if we have an assembler and, if so, set the ELF e_header flags. + void chksetELFHeaderEFlags(unsigned flags) { + if (getAssembler()) + getAssembler()->setELFHeaderEFlags(flags); + } + +/// @name Auto-generated Match Functions +/// { + +#define GET_ASSEMBLER_HEADER +#include "HexagonGenAsmMatcher.inc" + + /// } + +public: + HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, _STI), Parser(_Parser), + MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) { + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + + MCAsmParserExtension::Initialize(_Parser); + + Assembler = nullptr; + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer()); + Assembler = &MES->getAssembler(); + } + } + + bool mustExtend(OperandVector &Operands); + bool splitIdentifier(OperandVector &Operands); + bool parseOperand(OperandVector &Operands); + bool parseInstruction(OperandVector &Operands); + bool implicitExpressionLocation(OperandVector &Operands); + bool parseExpressionOrOperand(OperandVector &Operands); + bool parseExpression(MCExpr const *& Expr); + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override + { + llvm_unreachable("Unimplemented"); + } + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + AsmToken ID, OperandVector &Operands) override; + + virtual bool ParseDirective(AsmToken DirectiveID) override; +}; + +/// HexagonOperand - Instances of this class represent a parsed Hexagon machine +/// instruction. +struct HexagonOperand : public MCParsedAsmOperand { + enum KindTy { Token, Immediate, Register } Kind; + + SMLoc StartLoc, EndLoc; + + struct TokTy { + const char *Data; + unsigned Length; + }; + + struct RegTy { + unsigned RegNum; + }; + + struct ImmTy { + const MCExpr *Val; + bool MustExtend; + }; + + struct InstTy { + OperandVector *SubInsts; + }; + + union { + struct TokTy Tok; + struct RegTy Reg; + struct ImmTy Imm; + }; + + HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + +public: + HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + switch (Kind) { + case Register: + Reg = o.Reg; + break; + case Immediate: + Imm = o.Imm; + break; + case Token: + Tok = o.Tok; + break; + } + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + bool isToken() const { return Kind == Token; } + bool isImm() const { return Kind == Immediate; } + bool isMem() const { llvm_unreachable("No isMem"); } + bool isReg() const { return Kind == Register; } + + bool CheckImmRange(int immBits, int zeroBits, bool isSigned, + bool isRelocatable, bool Extendable) const { + if (Kind == Immediate) { + const MCExpr *myMCExpr = getImm(); + if (Imm.MustExtend && !Extendable) + return false; + int64_t Res; + if (myMCExpr->evaluateAsAbsolute(Res)) { + int bits = immBits + zeroBits; + // Field bit range is zerobits + bits + // zeroBits must be 0 + if (Res & ((1 << zeroBits) - 1)) + return false; + if (isSigned) { + if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1))) + return true; + } else { + if (bits == 64) + return true; + if (Res >= 0) + return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false; + else { + const int64_t high_bit_set = 1ULL << 63; + const uint64_t mask = (high_bit_set >> (63 - bits)); + return (((uint64_t)Res & mask) == mask) ? true : false; + } + } + } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable) + return true; + else if (myMCExpr->getKind() == MCExpr::Binary || + myMCExpr->getKind() == MCExpr::Unary) + return true; + } + return false; + } + + bool isf32Ext() const { return false; } + bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); } + bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); } + bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); } + bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); } + bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); } + bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); } + bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); } + bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); } + bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); } + bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); } + + bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); } + bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); } + bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); } + bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); } + bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); } + bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); } + bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); } + bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); } + bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); } + bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); } + bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); } + bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); } + bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); } + bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); } + bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); } + bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); } + bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); } + bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); } + bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); } + bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); } + bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); } + + bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); } + + bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); } + bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); } + bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); } + bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); } + bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); } + bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); } + bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); } + bool iss11_0Ext() const { + return CheckImmRange(11 + 26, 0, true, true, true); + } + bool iss11_1Ext() const { + return CheckImmRange(11 + 26, 1, true, true, true); + } + bool iss11_2Ext() const { + return CheckImmRange(11 + 26, 2, true, true, true); + } + bool iss11_3Ext() const { + return CheckImmRange(11 + 26, 3, true, true, true); + } + + bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } + bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); } + bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); } + bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); } + bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); } + bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } + bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); } + bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); } + bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); } + bool isu32MustExt() const { return isImm() && Imm.MustExtend; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createExpr(getImm())); + } + + void addSignedImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + MCExpr const *Expr = getImm(); + int64_t Value; + if (!Expr->evaluateAsAbsolute(Value)) { + Inst.addOperand(MCOperand::createExpr(Expr)); + return; + } + int64_t Extended = SignExtend64 (Value, 32); + if ((Extended < 0) == (Value < 0)) { + Inst.addOperand(MCOperand::createExpr(Expr)); + return; + } + // Flip bit 33 to signal signed unsigned mismatch + Extended ^= 0x100000000; + Inst.addOperand(MCOperand::createImm(Extended)); + } + + void addf32ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds32ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8Imm64Operands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds6ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_0ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_1ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_2ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_3ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds3ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + + void addu64ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu32ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu26_6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_0ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu11_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu10ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu9ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu8ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu7ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_0ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu5ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu4ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void addm6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addn8ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds16ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds12ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds10ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds9ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds6ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_0ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_1ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_2ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_3ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + + void addu6ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu7ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu8ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu9ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu10ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_0ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_1ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_2ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_3ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu32MustExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds4_6ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); + } + + void adds3_6ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); + } + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + virtual void print(raw_ostream &OS) const; + + static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) { + HexagonOperand *Op = new HexagonOperand(Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return std::unique_ptr<HexagonOperand>(Op); + } + + static std::unique_ptr<HexagonOperand> CreateReg(unsigned RegNum, SMLoc S, + SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Register); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return std::unique_ptr<HexagonOperand>(Op); + } + + static std::unique_ptr<HexagonOperand> CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Immediate); + Op->Imm.Val = Val; + Op->Imm.MustExtend = false; + Op->StartLoc = S; + Op->EndLoc = E; + return std::unique_ptr<HexagonOperand>(Op); + } +}; + +} // end anonymous namespace. + +void HexagonOperand::print(raw_ostream &OS) const { + switch (Kind) { + case Immediate: + getImm()->print(OS, nullptr); + break; + case Register: + OS << "<register R"; + OS << getReg() << ">"; + break; + case Token: + OS << "'" << getToken() << "'"; + break; + } +} + +/// @name Auto-generated Match Functions +static unsigned MatchRegisterName(StringRef Name); + +bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { + DEBUG(dbgs() << "Bundle:"); + DEBUG(MCB.dump_pretty(dbgs())); + DEBUG(dbgs() << "--\n"); + + // Check the bundle for errors. + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI); + + bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(), + getContext(), MCB, + &Check); + + while (Check.getNextErrInfo() == true) { + unsigned Reg = Check.getErrRegister(); + Twine R(RI->getName(Reg)); + + uint64_t Err = Check.getError(); + if (Err != HexagonMCErrInfo::CHECK_SUCCESS) { + if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err) + Error(IDLoc, + "unconditional branch cannot precede another branch in packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err || + HexagonMCErrInfo::CHECK_ERROR_NEWV & Err) + Error(IDLoc, "register `" + R + + "' used with `.new' " + "but not validly modified in the same packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err) + Error(IDLoc, "register `" + R + "' modified more than once"); + + if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err) + Error(IDLoc, "cannot write to read-only register `" + R + "'"); + + if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err) + Error(IDLoc, "loop-setup and some branch instructions " + "cannot be in the same packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) { + Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1'); + Error(IDLoc, "packet marked with `:endloop" + N + "' " + + "cannot contain instructions that modify register " + + "`" + R + "'"); + } + + if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err) + Error(IDLoc, + "instruction cannot appear in packet with other instructions"); + + if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err) + Error(IDLoc, "too many slots used in packet"); + + if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) { + uint64_t Erm = Check.getShuffleError(); + + if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm) + Error(IDLoc, "invalid instruction packet"); + else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm) + Error(IDLoc, "invalid instruction packet: too many stores"); + else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm) + Error(IDLoc, "invalid instruction packet: too many loads"); + else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm) + Error(IDLoc, "too many branches in packet"); + else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm) + Error(IDLoc, "invalid instruction packet: out of slots"); + else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm) + Error(IDLoc, "invalid instruction packet: slot error"); + else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm) + Error(IDLoc, "v60 packet violation"); + else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm) + Error(IDLoc, "slot 0 instruction does not allow slot 1 store"); + else + Error(IDLoc, "unknown error in instruction packet"); + } + } + + unsigned Warn = Check.getWarning(); + if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) { + if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn) + Warning(IDLoc, "register `" + R + "' used with `.cur' " + "but not used in the same packet"); + else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn) + Warning(IDLoc, "register `" + R + "' used with `.tmp' " + "but not used in the same packet"); + } + } + + if (CheckOk) { + MCB.setLoc(IDLoc); + if (HexagonMCInstrInfo::bundleSize(MCB) == 0) { + assert(!HexagonMCInstrInfo::isInnerLoop(MCB)); + assert(!HexagonMCInstrInfo::isOuterLoop(MCB)); + // Empty packets are valid yet aren't emitted + return false; + } + Out.EmitInstruction(MCB, getSTI()); + } else { + // If compounding and duplexing didn't reduce the size below + // 4 or less we have a packet that is too big. + if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) { + Error(IDLoc, "invalid instruction packet: out of slots"); + return true; // Error + } + } + + return false; // No error +} + +bool HexagonAsmParser::matchBundleOptions() { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + while (true) { + if (!Parser.getTok().is(AsmToken::Colon)) + return false; + Lexer.Lex(); + StringRef Option = Parser.getTok().getString(); + if (Option.compare_lower("endloop0") == 0) + HexagonMCInstrInfo::setInnerLoop(MCB); + else if (Option.compare_lower("endloop1") == 0) + HexagonMCInstrInfo::setOuterLoop(MCB); + else if (Option.compare_lower("mem_noshuf") == 0) + HexagonMCInstrInfo::setMemReorderDisabled(MCB); + else if (Option.compare_lower("mem_shuf") == 0) + HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB); + else + return true; + Lexer.Lex(); + } +} + +// For instruction aliases, immediates are generated rather than +// MCConstantExpr. Convert them for uniform MCExpr. +// Also check for signed/unsigned mismatches and warn +void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) { + MCInst NewInst; + NewInst.setOpcode(MCI.getOpcode()); + for (MCOperand &I : MCI) + if (I.isImm()) { + int64_t Value (I.getImm()); + if ((Value & 0x100000000) != (Value & 0x80000000)) { + // Detect flipped bit 33 wrt bit 32 and signal warning + Value ^= 0x100000000; + if (WarnSignedMismatch) + Warning (MCI.getLoc(), "Signed/Unsigned mismatch"); + } + NewInst.addOperand(MCOperand::createExpr( + MCConstantExpr::create(Value, getContext()))); + } + else + NewInst.addOperand(I); + MCI = NewInst; +} + +bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc, + OperandVector &InstOperands, + uint64_t &ErrorInfo, + bool MatchingInlineAsm, + bool &MustExtend) { + // Perform matching with tablegen asmmatcher generated function + int result = + MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm); + if (result == Match_Success) { + MCI.setLoc(IDLoc); + MustExtend = mustExtend(InstOperands); + canonicalizeImmediates(MCI); + result = processInstruction(MCI, InstOperands, IDLoc, MustExtend); + + DEBUG(dbgs() << "Insn:"); + DEBUG(MCI.dump_pretty(dbgs())); + DEBUG(dbgs() << "\n\n"); + + MCI.setLoc(IDLoc); + } + + // Create instruction operand for bundle instruction + // Break this into a separate function Code here is less readable + // Think about how to get an instruction error to report correctly. + // SMLoc will return the "{" + switch (result) { + default: + break; + case Match_Success: + return false; + case Match_MissingFeature: + return Error(IDLoc, "invalid instruction"); + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction"); + case Match_InvalidOperand: + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= InstOperands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = (static_cast<HexagonOperand *>(InstOperands[ErrorInfo].get())) + ->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + llvm_unreachable("Implement any new match types added!"); +} + +bool HexagonAsmParser::mustExtend(OperandVector &Operands) { + unsigned Count = 0; + for (std::unique_ptr<MCParsedAsmOperand> &i : Operands) + if (i->isImm()) + if (static_cast<HexagonOperand *>(i.get())->Imm.MustExtend) + ++Count; + // Multiple extenders should have been filtered by iss9Ext et. al. + assert(Count < 2 && "Multiple extenders"); + return Count == 1; +} + +bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + if (!InBrackets) { + MCB.clear(); + MCB.addOperand(MCOperand::createImm(0)); + } + HexagonOperand &FirstOperand = static_cast<HexagonOperand &>(*Operands[0]); + if (FirstOperand.isToken() && FirstOperand.getToken() == "{") { + assert(Operands.size() == 1 && "Brackets should be by themselves"); + if (InBrackets) { + getParser().Error(IDLoc, "Already in a packet"); + return true; + } + InBrackets = true; + return false; + } + if (FirstOperand.isToken() && FirstOperand.getToken() == "}") { + assert(Operands.size() == 1 && "Brackets should be by themselves"); + if (!InBrackets) { + getParser().Error(IDLoc, "Not in a packet"); + return true; + } + InBrackets = false; + if (matchBundleOptions()) + return true; + return finishBundle(IDLoc, Out); + } + MCInst *SubInst = new (getParser().getContext()) MCInst; + bool MustExtend = false; + if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo, + MatchingInlineAsm, MustExtend)) + return true; + HexagonMCInstrInfo::extendIfNeeded( + getParser().getContext(), MCII, MCB, *SubInst, + HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend); + MCB.addOperand(MCOperand::createInst(SubInst)); + if (!InBrackets) + return finishBundle(IDLoc, Out); + return false; +} + +/// ParseDirective parses the Hexagon specific directives +bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte")) + return ParseDirectiveValue(4, DirectiveID.getLoc()); + if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" || + IDVal.lower() == ".half") + return ParseDirectiveValue(2, DirectiveID.getLoc()); + if (IDVal.lower() == ".falign") + return ParseDirectiveFalign(256, DirectiveID.getLoc()); + if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon")) + return ParseDirectiveComm(true, DirectiveID.getLoc()); + if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common")) + return ParseDirectiveComm(false, DirectiveID.getLoc()); + if (IDVal.lower() == ".subsection") + return ParseDirectiveSubsection(DirectiveID.getLoc()); + + return true; +} +bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) { + const MCExpr *Subsection = 0; + int64_t Res; + + assert((getLexer().isNot(AsmToken::EndOfStatement)) && + "Invalid subsection directive"); + getParser().parseExpression(Subsection); + + if (!Subsection->evaluateAsAbsolute(Res)) + return Error(L, "Cannot evaluate subsection number"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the + // negative subsections together and in the same order but at the opposite + // end of the section. Only legacy hexagon-gcc created assembly code + // used negative subsections. + if ((Res < 0) && (Res > -8193)) + Subsection = MCConstantExpr::create(8192 + Res, this->getContext()); + + getStreamer().SubSection(Subsection); + return false; +} + +/// ::= .falign [expression] +bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { + + int64_t MaxBytesToFill = 15; + + // if there is an arguement + if (getLexer().isNot(AsmToken::EndOfStatement)) { + const MCExpr *Value; + SMLoc ExprLoc = L; + + // Make sure we have a number (false is returned if expression is a number) + if (getParser().parseExpression(Value) == false) { + // Make sure this is a number that is in range + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue)) + return Error(ExprLoc, "literal value out of range (256) for falign"); + MaxBytesToFill = IntValue; + Lex(); + } else { + return Error(ExprLoc, "not a valid expression for falign directive"); + } + } + + getTargetStreamer().emitFAlign(16, MaxBytesToFill); + Lex(); + + return false; +} + +/// ::= .word [ expression (, expression)* ] +bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + + for (;;) { + const MCExpr *Value; + SMLoc ExprLoc = L; + if (getParser().parseExpression(Value)) + return true; + + // Special case constant expressions to match code generator. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + assert(Size <= 8 && "Invalid size"); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(ExprLoc, "literal value out of range for directive"); + getStreamer().EmitIntValue(IntValue, Size); + } else + getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +// This is largely a copy of AsmParser's ParseDirectiveComm extended to +// accept a 3rd argument, AccessAlignment which indicates the smallest +// memory access made to the symbol, expressed in bytes. If no +// AccessAlignment is specified it defaults to the Alignment Value. +// Hexagon's .lcomm: +// .lcomm Symbol, Length, Alignment, AccessAlignment +bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) { + // FIXME: need better way to detect if AsmStreamer (upstream removed + // getKind()) + if (getStreamer().hasRawTextSupport()) + return true; // Only object file output requires special treatment. + + StringRef Name; + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in directive"); + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Size)) + return true; + + int64_t ByteAlignment = 1; + SMLoc ByteAlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + ByteAlignmentLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(ByteAlignment)) + return true; + if (!isPowerOf2_64(ByteAlignment)) + return Error(ByteAlignmentLoc, "alignment must be a power of 2"); + } + + int64_t AccessAlignment = 0; + if (getLexer().is(AsmToken::Comma)) { + // The optional access argument specifies the size of the smallest memory + // access to be made to the symbol, expressed in bytes. + SMLoc AccessAlignmentLoc; + Lex(); + AccessAlignmentLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(AccessAlignment)) + return true; + + if (!isPowerOf2_64(AccessAlignment)) + return Error(AccessAlignmentLoc, "access alignment must be a power of 2"); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); + + Lex(); + + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (ByteAlignment < 0) + return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(Loc, "invalid symbol redefinition"); + + HexagonMCELFStreamer &HexagonELFStreamer = + static_cast<HexagonMCELFStreamer &>(getStreamer()); + if (IsLocal) { + HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment, + AccessAlignment); + return false; + } + + HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment, + AccessAlignment); + return false; +} + +// validate register against architecture +bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const { + return true; +} + +// extern "C" void LLVMInitializeHexagonAsmLexer(); + +/// Force static initialization. +extern "C" void LLVMInitializeHexagonAsmParser() { + RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget); +} + +#define GET_MATCHER_IMPLEMENTATION +#define GET_REGISTER_MATCHER +#include "HexagonGenAsmMatcher.inc" + +namespace { +bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) { + if (Index >= Operands.size()) + return false; + MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1]; + if (!Operand.isToken()) + return false; + return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String); +} +bool previousIsLoop(OperandVector &Operands, size_t Index) { + return previousEqual(Operands, Index, "loop0") || + previousEqual(Operands, Index, "loop1") || + previousEqual(Operands, Index, "sp1loop0") || + previousEqual(Operands, Index, "sp2loop0") || + previousEqual(Operands, Index, "sp3loop0"); +} +} + +bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) { + AsmToken const &Token = getParser().getTok(); + StringRef String = Token.getString(); + SMLoc Loc = Token.getLoc(); + getLexer().Lex(); + do { + std::pair<StringRef, StringRef> HeadTail = String.split('.'); + if (!HeadTail.first.empty()) + Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc)); + if (!HeadTail.second.empty()) + Operands.push_back(HexagonOperand::CreateToken( + String.substr(HeadTail.first.size(), 1), Loc)); + String = HeadTail.second; + } while (!String.empty()); + return false; +} + +bool HexagonAsmParser::parseOperand(OperandVector &Operands) { + unsigned Register; + SMLoc Begin; + SMLoc End; + MCAsmLexer &Lexer = getLexer(); + if (!ParseRegister(Register, Begin, End)) { + if (!ErrorMissingParenthesis) + switch (Register) { + default: + break; + case Hexagon::P0: + case Hexagon::P1: + case Hexagon::P2: + case Hexagon::P3: + if (previousEqual(Operands, 0, "if")) { + if (WarnMissingParenthesis) + Warning (Begin, "Missing parenthesis around predicate register"); + static char const *LParen = "("; + static char const *RParen = ")"; + Operands.push_back(HexagonOperand::CreateToken(LParen, Begin)); + Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + AsmToken MaybeDotNew = Lexer.getTok(); + if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && + MaybeDotNew.getString().equals_lower(".new")) + splitIdentifier(Operands); + Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + return false; + } + if (previousEqual(Operands, 0, "!") && + previousEqual(Operands, 1, "if")) { + if (WarnMissingParenthesis) + Warning (Begin, "Missing parenthesis around predicate register"); + static char const *LParen = "("; + static char const *RParen = ")"; + Operands.insert(Operands.end () - 1, + HexagonOperand::CreateToken(LParen, Begin)); + Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + AsmToken MaybeDotNew = Lexer.getTok(); + if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && + MaybeDotNew.getString().equals_lower(".new")) + splitIdentifier(Operands); + Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + return false; + } + break; + } + Operands.push_back(HexagonOperand::CreateReg( + Register, Begin, End)); + return false; + } + return splitIdentifier(Operands); +} + +bool HexagonAsmParser::isLabel(AsmToken &Token) { + MCAsmLexer &Lexer = getLexer(); + AsmToken const &Second = Lexer.getTok(); + AsmToken Third = Lexer.peekTok(); + StringRef String = Token.getString(); + if (Token.is(AsmToken::TokenKind::LCurly) || + Token.is(AsmToken::TokenKind::RCurly)) + return false; + if (!Token.is(AsmToken::TokenKind::Identifier)) + return true; + if (!MatchRegisterName(String.lower())) + return true; + (void)Second; + assert(Second.is(AsmToken::Colon)); + StringRef Raw (String.data(), Third.getString().data() - String.data() + + Third.getString().size()); + std::string Collapsed = Raw; + Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), + Collapsed.end()); + StringRef Whole = Collapsed; + std::pair<StringRef, StringRef> DotSplit = Whole.split('.'); + if (!MatchRegisterName(DotSplit.first.lower())) + return true; + return false; +} + +bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) { + if (!Contigious && ErrorNoncontigiousRegister) { + Error(Loc, "Register name is not contigious"); + return true; + } + if (!Contigious && WarnNoncontigiousRegister) + Warning(Loc, "Register name is not contigious"); + return false; +} + +bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + MCAsmLexer &Lexer = getLexer(); + StartLoc = getLexer().getLoc(); + SmallVector<AsmToken, 5> Lookahead; + StringRef RawString(Lexer.getTok().getString().data(), 0); + bool Again = Lexer.is(AsmToken::Identifier); + bool NeededWorkaround = false; + while (Again) { + AsmToken const &Token = Lexer.getTok(); + RawString = StringRef(RawString.data(), + Token.getString().data() - RawString.data () + + Token.getString().size()); + Lookahead.push_back(Token); + Lexer.Lex(); + bool Contigious = Lexer.getTok().getString().data() == + Lookahead.back().getString().data() + + Lookahead.back().getString().size(); + bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) || + Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) || + Lexer.is(AsmToken::Colon); + bool Workaround = Lexer.is(AsmToken::Colon) || + Lookahead.back().is(AsmToken::Colon); + Again = (Contigious && Type) || (Workaround && Type); + NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type)); + } + std::string Collapsed = RawString; + Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), + Collapsed.end()); + StringRef FullString = Collapsed; + std::pair<StringRef, StringRef> DotSplit = FullString.split('.'); + unsigned DotReg = MatchRegisterName(DotSplit.first.lower()); + if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { + if (DotSplit.second.empty()) { + RegNo = DotReg; + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } else { + RegNo = DotReg; + size_t First = RawString.find('.'); + StringRef DotString (RawString.data() + First, RawString.size() - First); + Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString)); + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } + } + std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':'); + unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower()); + if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + RegNo = ColonReg; + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } + while (!Lookahead.empty()) { + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + } + return true; +} + +bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) { + if (previousEqual(Operands, 0, "call")) + return true; + if (previousEqual(Operands, 0, "jump")) + if (!getLexer().getTok().is(AsmToken::Colon)) + return true; + if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1)) + return true; + if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") && + (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t"))) + return true; + return false; +} + +bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) { + llvm::SmallVector<AsmToken, 4> Tokens; + MCAsmLexer &Lexer = getLexer(); + bool Done = false; + static char const * Comma = ","; + do { + Tokens.emplace_back (Lexer.getTok()); + Lexer.Lex(); + switch (Tokens.back().getKind()) + { + case AsmToken::TokenKind::Hash: + if (Tokens.size () > 1) + if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) { + Tokens.insert(Tokens.end() - 2, + AsmToken(AsmToken::TokenKind::Comma, Comma)); + Done = true; + } + break; + case AsmToken::TokenKind::RCurly: + case AsmToken::TokenKind::EndOfStatement: + case AsmToken::TokenKind::Eof: + Done = true; + break; + default: + break; + } + } while (!Done); + while (!Tokens.empty()) { + Lexer.UnLex(Tokens.back()); + Tokens.pop_back(); + } + return getParser().parseExpression(Expr); +} + +bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) { + if (implicitExpressionLocation(Operands)) { + MCAsmParser &Parser = getParser(); + SMLoc Loc = Parser.getLexer().getLoc(); + std::unique_ptr<HexagonOperand> Expr = + HexagonOperand::CreateImm(nullptr, Loc, Loc); + MCExpr const *& Val = Expr->Imm.Val; + Operands.push_back(std::move(Expr)); + return parseExpression(Val); + } + return parseOperand(Operands); +} + +/// Parse an instruction. +bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + while (true) { + AsmToken const &Token = Parser.getTok(); + switch (Token.getKind()) { + case AsmToken::EndOfStatement: { + Lexer.Lex(); + return false; + } + case AsmToken::LCurly: { + if (!Operands.empty()) + return true; + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + return false; + } + case AsmToken::RCurly: { + if (Operands.empty()) { + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + } + return false; + } + case AsmToken::Comma: { + Lexer.Lex(); + continue; + } + case AsmToken::EqualEqual: + case AsmToken::ExclaimEqual: + case AsmToken::GreaterEqual: + case AsmToken::GreaterGreater: + case AsmToken::LessEqual: + case AsmToken::LessLess: { + Operands.push_back(HexagonOperand::CreateToken( + Token.getString().substr(0, 1), Token.getLoc())); + Operands.push_back(HexagonOperand::CreateToken( + Token.getString().substr(1, 1), Token.getLoc())); + Lexer.Lex(); + continue; + } + case AsmToken::Hash: { + bool MustNotExtend = false; + bool ImplicitExpression = implicitExpressionLocation(Operands); + std::unique_ptr<HexagonOperand> Expr = HexagonOperand::CreateImm( + nullptr, Lexer.getLoc(), Lexer.getLoc()); + if (!ImplicitExpression) + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + bool MustExtend = false; + bool HiOnly = false; + bool LoOnly = false; + if (Lexer.is(AsmToken::Hash)) { + Lexer.Lex(); + MustExtend = true; + } else if (ImplicitExpression) + MustNotExtend = true; + AsmToken const &Token = Parser.getTok(); + if (Token.is(AsmToken::Identifier)) { + StringRef String = Token.getString(); + AsmToken IDToken = Token; + if (String.lower() == "hi") { + HiOnly = true; + } else if (String.lower() == "lo") { + LoOnly = true; + } + if (HiOnly || LoOnly) { + AsmToken LParen = Lexer.peekTok(); + if (!LParen.is(AsmToken::LParen)) { + HiOnly = false; + LoOnly = false; + } else { + Lexer.Lex(); + } + } + } + if (parseExpression(Expr->Imm.Val)) + return true; + int64_t Value; + MCContext &Context = Parser.getContext(); + assert(Expr->Imm.Val != nullptr); + if (Expr->Imm.Val->evaluateAsAbsolute(Value)) { + if (HiOnly) + Expr->Imm.Val = MCBinaryExpr::createLShr( + Expr->Imm.Val, MCConstantExpr::create(16, Context), Context); + if (HiOnly || LoOnly) + Expr->Imm.Val = MCBinaryExpr::createAnd( + Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context); + } + if (MustNotExtend) + Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context); + Expr->Imm.MustExtend = MustExtend; + Operands.push_back(std::move(Expr)); + continue; + } + default: + break; + } + if (parseExpressionOrOperand(Operands)) + return true; + } +} + +bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, + AsmToken ID, + OperandVector &Operands) { + getLexer().UnLex(ID); + return parseInstruction(Operands); +} + +namespace { +MCInst makeCombineInst(int opCode, MCOperand &Rdd, + MCOperand &MO1, MCOperand &MO2) { + MCInst TmpInst; + TmpInst.setOpcode(opCode); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(MO1); + TmpInst.addOperand(MO2); + + return TmpInst; +} +} + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, + unsigned Kind) { + HexagonOperand *Op = static_cast<HexagonOperand *>(&AsmOp); + + switch (Kind) { + case MCK_0: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0 + ? Match_Success + : Match_InvalidOperand; + } + case MCK_1: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1 + ? Match_Success + : Match_InvalidOperand; + } + case MCK__MINUS_1: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1 + ? Match_Success + : Match_InvalidOperand; + } + } + if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) { + StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length); + if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind) + return Match_Success; + if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind) + return Match_Success; + } + + DEBUG(dbgs() << "Unmatched Operand:"); + DEBUG(Op->dump()); + DEBUG(dbgs() << "\n"); + + return Match_InvalidOperand; +} + +void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) { + std::string errStr; + raw_string_ostream ES(errStr); + ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: "; + if (Max >= 0) + ES << "0-" << Max; + else + ES << Max << "-" << (-Max - 1); + Error(IDLoc, ES.str().c_str()); +} + +int HexagonAsmParser::processInstruction(MCInst &Inst, + OperandVector const &Operands, + SMLoc IDLoc, bool &MustExtend) { + MCContext &Context = getParser().getContext(); + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + std::string r = "r"; + std::string v = "v"; + std::string Colon = ":"; + + bool is32bit = false; // used to distinguish between CONST32 and CONST64 + switch (Inst.getOpcode()) { + default: + break; + + case Hexagon::M4_mpyrr_addr: + case Hexagon::S4_addi_asl_ri: + case Hexagon::S4_addi_lsr_ri: + case Hexagon::S4_andi_asl_ri: + case Hexagon::S4_andi_lsr_ri: + case Hexagon::S4_ori_asl_ri: + case Hexagon::S4_ori_lsr_ri: + case Hexagon::S4_or_andix: + case Hexagon::S4_subi_asl_ri: + case Hexagon::S4_subi_lsr_ri: { + MCOperand &Ry = Inst.getOperand(0); + MCOperand &src = Inst.getOperand(2); + if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg())) + return Match_InvalidOperand; + break; + } + + case Hexagon::C2_cmpgei: { + MCOperand &MO = Inst.getOperand(2); + MO.setExpr(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::C2_cmpgti); + break; + } + + case Hexagon::C2_cmpgeui: { + MCOperand &MO = Inst.getOperand(2); + int64_t Value; + bool Success = MO.getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success && "Assured by matcher"); + if (Value == 0) { + MCInst TmpInst; + MCOperand &Pd = Inst.getOperand(0); + MCOperand &Rt = Inst.getOperand(1); + TmpInst.setOpcode(Hexagon::C2_cmpeq); + TmpInst.addOperand(Pd); + TmpInst.addOperand(Rt); + TmpInst.addOperand(Rt); + Inst = TmpInst; + } else { + MO.setExpr(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::C2_cmpgtui); + } + break; + } + case Hexagon::J2_loop1r: + case Hexagon::J2_loop1i: + case Hexagon::J2_loop0r: + case Hexagon::J2_loop0i: { + MCOperand &MO = Inst.getOperand(0); + // Loop has different opcodes for extended vs not extended, but we should + // not use the other opcode as it is a legacy artifact of TD files. + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) { + // if the operand can fit within a 7:2 field + if (Value < (1 << 8) && Value >= -(1 << 8)) { + SMLoc myLoc = Operands[2]->getStartLoc(); + // # is left in startLoc in the case of ## + // If '##' found then force extension. + if (*myLoc.getPointer() == '#') { + MustExtend = true; + break; + } + } else { + // If immediate and out of 7:2 range. + MustExtend = true; + } + } + break; + } + + // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" + case Hexagon::A2_tfrp: { + MCOperand &MO = Inst.getOperand(1); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode(Hexagon::A2_combinew); + break; + } + + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: { + MCOperand &MO = Inst.getOperand(2); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) + ? Hexagon::C2_ccombinewt + : Hexagon::C2_ccombinewf); + break; + } + case Hexagon::A2_tfrptnew: + case Hexagon::A2_tfrpfnew: { + MCOperand &MO = Inst.getOperand(2); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) + ? Hexagon::C2_ccombinewnewt + : Hexagon::C2_ccombinewnewf); + break; + } + + // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " + case Hexagon::CONST32: + case Hexagon::CONST32_Float_Real: + case Hexagon::CONST32_Int_Real: + case Hexagon::FCONST32_nsdata: + is32bit = true; + // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) " + case Hexagon::CONST64_Float_Real: + case Hexagon::CONST64_Int_Real: + + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer()); + MCOperand &MO_1 = Inst.getOperand(1); + MCOperand &MO_0 = Inst.getOperand(0); + + // push section onto section stack + MES->PushSection(); + + std::string myCharStr; + MCSectionELF *mySection; + + // check if this as an immediate or a symbol + int64_t Value; + bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value); + if (Absolute) { + // Create a new section - one for each constant + // Some or all of the zeros are replaced with the given immediate. + if (is32bit) { + std::string myImmStr = utohexstr(static_cast<uint32_t>(Value)); + myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000") + .drop_back(myImmStr.size()) + .str() + + myImmStr; + } else { + std::string myImmStr = utohexstr(Value); + myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000") + .drop_back(myImmStr.size()) + .str() + + myImmStr; + } + + mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); + } else if (MO_1.isExpr()) { + // .lita - for expressions + myCharStr = ".lita"; + mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); + } else + llvm_unreachable("unexpected type of machine operand!"); + + MES->SwitchSection(mySection); + unsigned byteSize = is32bit ? 4 : 8; + getStreamer().EmitCodeAlignment(byteSize, byteSize); + + MCSymbol *Sym; + + // for symbols, get rid of prepended ".gnu.linkonce.lx." + + // emit symbol if needed + if (Absolute) { + Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16)); + if (Sym->isUndefined()) { + getStreamer().EmitLabel(Sym); + getStreamer().EmitSymbolAttribute(Sym, MCSA_Global); + getStreamer().EmitIntValue(Value, byteSize); + } + } else if (MO_1.isExpr()) { + const char *StringStart = 0; + const char *StringEnd = 0; + if (*Operands[4]->getStartLoc().getPointer() == '#') { + StringStart = Operands[5]->getStartLoc().getPointer(); + StringEnd = Operands[6]->getStartLoc().getPointer(); + } else { // no pound + StringStart = Operands[4]->getStartLoc().getPointer(); + StringEnd = Operands[5]->getStartLoc().getPointer(); + } + + unsigned size = StringEnd - StringStart; + std::string DotConst = ".CONST_"; + Sym = getContext().getOrCreateSymbol(DotConst + + StringRef(StringStart, size)); + + if (Sym->isUndefined()) { + // case where symbol is not yet defined: emit symbol + getStreamer().EmitLabel(Sym); + getStreamer().EmitSymbolAttribute(Sym, MCSA_Local); + getStreamer().EmitValue(MO_1.getExpr(), 4); + } + } else + llvm_unreachable("unexpected type of machine operand!"); + + MES->PopSection(); + + if (Sym) { + MCInst TmpInst; + if (is32bit) // 32 bit + TmpInst.setOpcode(Hexagon::L2_loadrigp); + else // 64 bit + TmpInst.setOpcode(Hexagon::L2_loadrdgp); + + TmpInst.addOperand(MO_0); + TmpInst.addOperand( + MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext()))); + Inst = TmpInst; + } + } + break; + + // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)" + case Hexagon::A2_tfrpi: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO = Inst.getOperand(1); + int64_t Value; + int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0; + MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context))); + Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO); + break; + } + + // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)" + case Hexagon::TFRI64_V4: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO = Inst.getOperand(1); + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) { + unsigned long long u64 = Value; + signed int s8 = (u64 >> 32) & 0xFFFFFFFF; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + MCOperand imm(MCOperand::createExpr( + MCConstantExpr::create(s8, Context))); // upper 32 + MCOperand imm2(MCOperand::createExpr( + MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32 + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2); + } else { + MCOperand imm(MCOperand::createExpr( + MCConstantExpr::create(0, Context))); // upper 32 + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO); + } + break; + } + + // Handle $Rdd = combine(##imm, #imm)" + case Hexagon::TFRI64_V2_ext: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO1 = Inst.getOperand(1); + MCOperand &MO2 = Inst.getOperand(2); + int64_t Value; + if (MO2.getExpr()->evaluateAsAbsolute(Value)) { + int s8 = Value; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + } + Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2); + break; + } + + // Handle $Rdd = combine(#imm, ##imm)" + case Hexagon::A4_combineii: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO1 = Inst.getOperand(1); + int64_t Value; + if (MO1.getExpr()->evaluateAsAbsolute(Value)) { + int s8 = Value; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + } + MCOperand &MO2 = Inst.getOperand(2); + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2); + break; + } + + case Hexagon::S2_tableidxb_goodsyntax: { + Inst.setOpcode(Hexagon::S2_tableidxb); + break; + } + + case Hexagon::S2_tableidxh_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(1, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxh); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::S2_tableidxw_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(2, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxw); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::S2_tableidxd_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(3, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxd); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::M2_mpyui: { + Inst.setOpcode(Hexagon::M2_mpyi); + break; + } + case Hexagon::M2_mpysmi: { + MCInst TmpInst; + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (!MustExtend) { + if (Value < 0 && Value > -256) { + Imm.setExpr(MCConstantExpr::create(Value * -1, Context)); + TmpInst.setOpcode(Hexagon::M2_mpysin); + } else if (Value < 256 && Value >= 0) + TmpInst.setOpcode(Hexagon::M2_mpysip); + else + return Match_InvalidOperand; + } else { + if (Value >= 0) + TmpInst.setOpcode(Hexagon::M2_mpysip); + else + return Match_InvalidOperand; + } + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm); + Inst = TmpInst; + break; + } + + case Hexagon::S2_asr_i_r_rnd_goodsyntax: { + MCOperand &Imm = Inst.getOperand(2); + MCInst TmpInst; + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { // convert to $Rd = $Rs + TmpInst.setOpcode(Hexagon::A2_tfr); + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd); + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm); + } + Inst = TmpInst; + break; + } + + case Hexagon::S2_asr_i_p_rnd_goodsyntax: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1]) + MCInst TmpInst; + unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + Rss.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(Rss); + TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst = TmpInst; + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S2_asr_i_p_rnd); + } + break; + } + + case Hexagon::A4_boundscheck: { + MCOperand &Rs = Inst.getOperand(1); + unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); + if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 + Inst.setOpcode(Hexagon::A4_boundscheck_hi); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } else { // raw:lo + Inst.setOpcode(Hexagon::A4_boundscheck_lo); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::A2_addsp: { + MCOperand &Rs = Inst.getOperand(1); + unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); + if (RegNum & 1) { // Odd mapped to raw:hi + Inst.setOpcode(Hexagon::A2_addsph); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped raw:lo + Inst.setOpcode(Hexagon::A2_addspl); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::M2_vrcmpys_s1: { + MCOperand &Rt = Inst.getOperand(2); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to sat:raw:hi + Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped sat:raw:lo + Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::M2_vrcmpys_acc_s1: { + MCInst TmpInst; + MCOperand &Rxx = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(2); + MCOperand &Rt = Inst.getOperand(3); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to sat:raw:hi + TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped sat:raw:lo + TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + // Registers are in different positions + TmpInst.addOperand(Rxx); + TmpInst.addOperand(Rxx); + TmpInst.addOperand(Rss); + TmpInst.addOperand(Rt); + Inst = TmpInst; + break; + } + + case Hexagon::M2_vrcmpys_s1rp: { + MCOperand &Rt = Inst.getOperand(2); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi + Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped rnd:sat:raw:lo + Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::S5_asrhub_rnd_sat_goodsyntax: { + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) + Inst.setOpcode(Hexagon::S2_vsathub); + else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat); + } + break; + } + + case Hexagon::S5_vasrhrnd_goodsyntax: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { + MCInst TmpInst; + unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + Rss.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(Rss); + TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst = TmpInst; + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S5_vasrhrnd); + } + break; + } + + case Hexagon::A2_not: { + MCInst TmpInst; + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.setOpcode(Hexagon::A2_subri); + TmpInst.addOperand(Rd); + TmpInst.addOperand( + MCOperand::createExpr(MCConstantExpr::create(-1, Context))); + TmpInst.addOperand(Rs); + Inst = TmpInst; + break; + } + } // switch + + return Match_Success; +} diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp new file mode 100644 index 0000000..ea96eb0 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -0,0 +1,1127 @@ +//===--- BitTracker.cpp ---------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// SSA-based bit propagation. +// +// The purpose of this code is, for a given virtual register, to provide +// information about the value of each bit in the register. The values +// of bits are represented by the class BitValue, and take one of four +// cases: 0, 1, "ref" and "bottom". The 0 and 1 are rather clear, the +// "ref" value means that the bit is a copy of another bit (which itself +// cannot be a copy of yet another bit---such chains are not allowed). +// A "ref" value is associated with a BitRef structure, which indicates +// which virtual register, and which bit in that register is the origin +// of the value. For example, given an instruction +// vreg2 = ASL vreg1, 1 +// assuming that nothing is known about bits of vreg1, bit 1 of vreg2 +// will be a "ref" to (vreg1, 0). If there is a subsequent instruction +// vreg3 = ASL vreg2, 2 +// then bit 3 of vreg3 will be a "ref" to (vreg1, 0) as well. +// The "bottom" case means that the bit's value cannot be determined, +// and that this virtual register actually defines it. The "bottom" case +// is discussed in detail in BitTracker.h. In fact, "bottom" is a "ref +// to self", so for the vreg1 above, the bit 0 of it will be a "ref" to +// (vreg1, 0), bit 1 will be a "ref" to (vreg1, 1), etc. +// +// The tracker implements the Wegman-Zadeck algorithm, originally developed +// for SSA-based constant propagation. Each register is represented as +// a sequence of bits, with the convention that bit 0 is the least signi- +// ficant bit. Each bit is propagated individually. The class RegisterCell +// implements the register's representation, and is also the subject of +// the lattice operations in the tracker. +// +// The intended usage of the bit tracker is to create a target-specific +// machine instruction evaluator, pass the evaluator to the BitTracker +// object, and run the tracker. The tracker will then collect the bit +// value information for a given machine function. After that, it can be +// queried for the cells for each virtual register. +// Sample code: +// const TargetSpecificEvaluator TSE(TRI, MRI); +// BitTracker BT(TSE, MF); +// BT.run(); +// ... +// unsigned Reg = interestingRegister(); +// RegisterCell RC = BT.get(Reg); +// if (RC[3].is(1)) +// Reg0bit3 = 1; +// +// The code below is intended to be fully target-independent. + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include "BitTracker.h" + +using namespace llvm; + +typedef BitTracker BT; + +namespace { + // Local trickery to pretty print a register (without the whole "%vreg" + // business). + struct printv { + printv(unsigned r) : R(r) {} + unsigned R; + }; + raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { + if (PV.R) + OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); + else + OS << 's'; + return OS; + } +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::BitValue &BV) { + switch (BV.Type) { + case BT::BitValue::Top: + OS << 'T'; + break; + case BT::BitValue::Zero: + OS << '0'; + break; + case BT::BitValue::One: + OS << '1'; + break; + case BT::BitValue::Ref: + OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']'; + break; + } + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::RegisterCell &RC) { + unsigned n = RC.Bits.size(); + OS << "{ w:" << n; + // Instead of printing each bit value individually, try to group them + // into logical segments, such as sequences of 0 or 1 bits or references + // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz"). + // "Start" will be the index of the beginning of the most recent segment. + unsigned Start = 0; + bool SeqRef = false; // A sequence of refs to consecutive bits. + bool ConstRef = false; // A sequence of refs to the same bit. + + for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) { + const BT::BitValue &V = RC[i]; + const BT::BitValue &SV = RC[Start]; + bool IsRef = (V.Type == BT::BitValue::Ref); + // If the current value is the same as Start, skip to the next one. + if (!IsRef && V == SV) + continue; + if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) { + if (Start+1 == i) { + SeqRef = (V.RefI.Pos == SV.RefI.Pos+1); + ConstRef = (V.RefI.Pos == SV.RefI.Pos); + } + if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start)) + continue; + if (ConstRef && V.RefI.Pos == SV.RefI.Pos) + continue; + } + + // The current value is different. Print the previous one and reset + // the Start. + OS << " [" << Start; + unsigned Count = i - Start; + if (Count == 1) { + OS << "]:" << SV; + } else { + OS << '-' << i-1 << "]:"; + if (SV.Type == BT::BitValue::Ref && SeqRef) + OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-' + << SV.RefI.Pos+(Count-1) << ']'; + else + OS << SV; + } + Start = i; + SeqRef = ConstRef = false; + } + + OS << " [" << Start; + unsigned Count = n - Start; + if (n-Start == 1) { + OS << "]:" << RC[Start]; + } else { + OS << '-' << n-1 << "]:"; + const BT::BitValue &SV = RC[Start]; + if (SV.Type == BT::BitValue::Ref && SeqRef) + OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-' + << SV.RefI.Pos+(Count-1) << ']'; + else + OS << SV; + } + OS << " }"; + + return OS; +} + +BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) + : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {} + +BitTracker::~BitTracker() { + delete ⤅ +} + + +// If we were allowed to update a cell for a part of a register, the meet +// operation would need to be parametrized by the register number and the +// exact part of the register, so that the computer BitRefs correspond to +// the actual bits of the "self" register. +// While this cannot happen in the current implementation, I'm not sure +// if this should be ruled out in the future. +bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { + // An example when "meet" can be invoked with SelfR == 0 is a phi node + // with a physical register as an operand. + assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR)); + bool Changed = false; + for (uint16_t i = 0, n = Bits.size(); i < n; ++i) { + const BitValue &RCV = RC[i]; + Changed |= Bits[i].meet(RCV, BitRef(SelfR, i)); + } + return Changed; +} + + +// Insert the entire cell RC into the current cell at position given by M. +BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, + const BitMask &M) { + uint16_t B = M.first(), E = M.last(), W = width(); + // Sanity: M must be a valid mask for *this. + assert(B < W && E < W); + // Sanity: the masked part of *this must have the same number of bits + // as the source. + assert(B > E || E-B+1 == RC.width()); // B <= E => E-B+1 = |RC|. + assert(B <= E || E+(W-B)+1 == RC.width()); // E < B => E+(W-B)+1 = |RC|. + if (B <= E) { + for (uint16_t i = 0; i <= E-B; ++i) + Bits[i+B] = RC[i]; + } else { + for (uint16_t i = 0; i < W-B; ++i) + Bits[i+B] = RC[i]; + for (uint16_t i = 0; i <= E; ++i) + Bits[i] = RC[i+(W-B)]; + } + return *this; +} + + +BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { + uint16_t B = M.first(), E = M.last(), W = width(); + assert(B < W && E < W); + if (B <= E) { + RegisterCell RC(E-B+1); + for (uint16_t i = B; i <= E; ++i) + RC.Bits[i-B] = Bits[i]; + return RC; + } + + RegisterCell RC(E+(W-B)+1); + for (uint16_t i = 0; i < W-B; ++i) + RC.Bits[i] = Bits[i+B]; + for (uint16_t i = 0; i <= E; ++i) + RC.Bits[i+(W-B)] = Bits[i]; + return RC; +} + + +BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { + // Rotate left (i.e. towards increasing bit indices). + // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1] + uint16_t W = width(); + Sh = Sh % W; + if (Sh == 0) + return *this; + + RegisterCell Tmp(W-Sh); + // Tmp = [0..W-Sh-1]. + for (uint16_t i = 0; i < W-Sh; ++i) + Tmp[i] = Bits[i]; + // Shift [W-Sh..W-1] to [0..Sh-1]. + for (uint16_t i = 0; i < Sh; ++i) + Bits[i] = Bits[W-Sh+i]; + // Copy Tmp to [Sh..W-1]. + for (uint16_t i = 0; i < W-Sh; ++i) + Bits[i+Sh] = Tmp.Bits[i]; + return *this; +} + + +BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, + const BitValue &V) { + assert(B <= E); + while (B < E) + Bits[B++] = V; + return *this; +} + + +BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { + // Append the cell given as the argument to the "this" cell. + // Bit 0 of RC becomes bit W of the result, where W is this->width(). + uint16_t W = width(), WRC = RC.width(); + Bits.resize(W+WRC); + for (uint16_t i = 0; i < WRC; ++i) + Bits[i+W] = RC.Bits[i]; + return *this; +} + + +uint16_t BT::RegisterCell::ct(bool B) const { + uint16_t W = width(); + uint16_t C = 0; + BitValue V = B; + while (C < W && Bits[C] == V) + C++; + return C; +} + + +uint16_t BT::RegisterCell::cl(bool B) const { + uint16_t W = width(); + uint16_t C = 0; + BitValue V = B; + while (C < W && Bits[W-(C+1)] == V) + C++; + return C; +} + + +bool BT::RegisterCell::operator== (const RegisterCell &RC) const { + uint16_t W = Bits.size(); + if (RC.Bits.size() != W) + return false; + for (uint16_t i = 0; i < W; ++i) + if (Bits[i] != RC[i]) + return false; + return true; +} + + +uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { + // The general problem is with finding a register class that corresponds + // to a given reference reg:sub. There can be several such classes, and + // since we only care about the register size, it does not matter which + // such class we would find. + // The easiest way to accomplish what we want is to + // 1. find a physical register PhysR from the same class as RR.Reg, + // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub, + // 3. find a register class that contains PhysS. + unsigned PhysR; + if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) { + const TargetRegisterClass *VC = MRI.getRegClass(RR.Reg); + assert(VC->begin() != VC->end() && "Empty register class"); + PhysR = *VC->begin(); + } else { + assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg)); + PhysR = RR.Reg; + } + + unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS); + uint16_t BW = RC->getSize()*8; + return BW; +} + + +BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, + const CellMapType &M) const { + uint16_t BW = getRegBitWidth(RR); + + // Physical registers are assumed to be present in the map with an unknown + // value. Don't actually insert anything in the map, just return the cell. + if (TargetRegisterInfo::isPhysicalRegister(RR.Reg)) + return RegisterCell::self(0, BW); + + assert(TargetRegisterInfo::isVirtualRegister(RR.Reg)); + // For virtual registers that belong to a class that is not tracked, + // generate an "unknown" value as well. + const TargetRegisterClass *C = MRI.getRegClass(RR.Reg); + if (!track(C)) + return RegisterCell::self(0, BW); + + CellMapType::const_iterator F = M.find(RR.Reg); + if (F != M.end()) { + if (!RR.Sub) + return F->second; + BitMask M = mask(RR.Reg, RR.Sub); + return F->second.extract(M); + } + // If not found, create a "top" entry, but do not insert it in the map. + return RegisterCell::top(BW); +} + + +void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, + CellMapType &M) const { + // While updating the cell map can be done in a meaningful way for + // a part of a register, it makes little sense to implement it as the + // SSA representation would never contain such "partial definitions". + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return; + assert(RR.Sub == 0 && "Unexpected sub-register in definition"); + // Eliminate all ref-to-reg-0 bit values: replace them with "self". + for (unsigned i = 0, n = RC.width(); i < n; ++i) { + const BitValue &V = RC[i]; + if (V.Type == BitValue::Ref && V.RefI.Reg == 0) + RC[i].RefI = BitRef(RR.Reg, i); + } + M[RR.Reg] = RC; +} + + +// Check if the cell represents a compile-time integer value. +bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { + uint16_t W = A.width(); + for (uint16_t i = 0; i < W; ++i) + if (!A[i].is(0) && !A[i].is(1)) + return false; + return true; +} + + +// Convert a cell to the integer value. The result must fit in uint64_t. +uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { + assert(isInt(A)); + uint64_t Val = 0; + uint16_t W = A.width(); + for (uint16_t i = 0; i < W; ++i) { + Val <<= 1; + Val |= A[i].is(1); + } + return Val; +} + + +// Evaluator helper functions. These implement some common operation on +// register cells that can be used to implement target-specific instructions +// in a target-specific evaluator. + +BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const { + RegisterCell Res(W); + // For bits beyond the 63rd, this will generate the sign bit of V. + for (uint16_t i = 0; i < W; ++i) { + Res[i] = BitValue(V & 1); + V >>= 1; + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { + APInt A = CI->getValue(); + uint16_t BW = A.getBitWidth(); + assert((unsigned)BW == A.getBitWidth() && "BitWidth overflow"); + RegisterCell Res(BW); + for (uint16_t i = 0; i < BW; ++i) + Res[i] = A[i]; + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + bool Carry = false; + uint16_t I; + for (I = 0; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + if (!V1.num() || !V2.num()) + break; + unsigned S = bool(V1) + bool(V2) + Carry; + Res[I] = BitValue(S & 1); + Carry = (S > 1); + } + for (; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + // If the next bit is same as Carry, the result will be 0 plus the + // other bit. The Carry bit will remain unchanged. + if (V1.is(Carry)) + Res[I] = BitValue::ref(V2); + else if (V2.is(Carry)) + Res[I] = BitValue::ref(V1); + else + break; + } + for (; I < W; ++I) + Res[I] = BitValue::self(); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + bool Borrow = false; + uint16_t I; + for (I = 0; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + if (!V1.num() || !V2.num()) + break; + unsigned S = bool(V1) - bool(V2) - Borrow; + Res[I] = BitValue(S & 1); + Borrow = (S > 1); + } + for (; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + if (V1.is(Borrow)) { + Res[I] = BitValue::ref(V2); + break; + } + if (V2.is(Borrow)) + Res[I] = BitValue::ref(V1); + else + break; + } + for (; I < W; ++I) + Res[I] = BitValue::self(); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width() + A2.width(); + uint16_t Z = A1.ct(0) + A2.ct(0); + RegisterCell Res(W); + Res.fill(0, Z, BitValue::Zero); + Res.fill(Z, W, BitValue::self()); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width() + A2.width(); + uint16_t Z = A1.ct(0) + A2.ct(0); + RegisterCell Res(W); + Res.fill(0, Z, BitValue::Zero); + Res.fill(Z, W, BitValue::self()); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, + uint16_t Sh) const { + assert(Sh <= A1.width()); + RegisterCell Res = RegisterCell::ref(A1); + Res.rol(Sh); + Res.fill(0, Sh, BitValue::Zero); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, + uint16_t Sh) const { + uint16_t W = A1.width(); + assert(Sh <= W); + RegisterCell Res = RegisterCell::ref(A1); + Res.rol(W-Sh); + Res.fill(W-Sh, W, BitValue::Zero); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, + uint16_t Sh) const { + uint16_t W = A1.width(); + assert(Sh <= W); + RegisterCell Res = RegisterCell::ref(A1); + BitValue Sign = Res[W-1]; + Res.rol(W-Sh); + Res.fill(W-Sh, W, Sign); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V1 = A1[i]; + const BitValue &V2 = A2[i]; + if (V1.is(1)) + Res[i] = BitValue::ref(V2); + else if (V2.is(1)) + Res[i] = BitValue::ref(V1); + else if (V1.is(0) || V2.is(0)) + Res[i] = BitValue::Zero; + else if (V1 == V2) + Res[i] = V1; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V1 = A1[i]; + const BitValue &V2 = A2[i]; + if (V1.is(1) || V2.is(1)) + Res[i] = BitValue::One; + else if (V1.is(0)) + Res[i] = BitValue::ref(V2); + else if (V2.is(0)) + Res[i] = BitValue::ref(V1); + else if (V1 == V2) + Res[i] = V1; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V1 = A1[i]; + const BitValue &V2 = A2[i]; + if (V1.is(0)) + Res[i] = BitValue::ref(V2); + else if (V2.is(0)) + Res[i] = BitValue::ref(V1); + else if (V1 == V2) + Res[i] = BitValue::Zero; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { + uint16_t W = A1.width(); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V = A1[i]; + if (V.is(0)) + Res[i] = BitValue::One; + else if (V.is(1)) + Res[i] = BitValue::Zero; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, + uint16_t BitN) const { + assert(BitN < A1.width()); + RegisterCell Res = RegisterCell::ref(A1); + Res[BitN] = BitValue::One; + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, + uint16_t BitN) const { + assert(BitN < A1.width()); + RegisterCell Res = RegisterCell::ref(A1); + Res[BitN] = BitValue::Zero; + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, + uint16_t W) const { + uint16_t C = A1.cl(B), AW = A1.width(); + // If the last leading non-B bit is not a constant, then we don't know + // the real count. + if ((C < AW && A1[AW-1-C].num()) || C == AW) + return eIMM(C, W); + return RegisterCell::self(0, W); +} + + +BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, + uint16_t W) const { + uint16_t C = A1.ct(B), AW = A1.width(); + // If the last trailing non-B bit is not a constant, then we don't know + // the real count. + if ((C < AW && A1[C].num()) || C == AW) + return eIMM(C, W); + return RegisterCell::self(0, W); +} + + +BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, + uint16_t FromN) const { + uint16_t W = A1.width(); + assert(FromN <= W); + RegisterCell Res = RegisterCell::ref(A1); + BitValue Sign = Res[FromN-1]; + // Sign-extend "inreg". + Res.fill(FromN, W, Sign); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, + uint16_t FromN) const { + uint16_t W = A1.width(); + assert(FromN <= W); + RegisterCell Res = RegisterCell::ref(A1); + Res.fill(FromN, W, BitValue::Zero); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, + uint16_t B, uint16_t E) const { + uint16_t W = A1.width(); + assert(B < W && E <= W); + if (B == E) + return RegisterCell(0); + uint16_t Last = (E > 0) ? E-1 : W-1; + RegisterCell Res = RegisterCell::ref(A1).extract(BT::BitMask(B, Last)); + // Return shorter cell. + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, + const RegisterCell &A2, uint16_t AtN) const { + uint16_t W1 = A1.width(), W2 = A2.width(); + (void)W1; + assert(AtN < W1 && AtN+W2 <= W1); + // Copy bits from A1, insert A2 at position AtN. + RegisterCell Res = RegisterCell::ref(A1); + if (W2 > 0) + Res.insert(RegisterCell::ref(A2), BT::BitMask(AtN, AtN+W2-1)); + return Res; +} + + +BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { + assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0"); + uint16_t W = getRegBitWidth(Reg); + assert(W > 0 && "Cannot generate mask for empty register"); + return BitMask(0, W-1); +} + + +bool BT::MachineEvaluator::evaluate(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::REG_SEQUENCE: { + RegisterRef RD = MI->getOperand(0); + assert(RD.Sub == 0); + RegisterRef RS = MI->getOperand(1); + unsigned SS = MI->getOperand(2).getImm(); + RegisterRef RT = MI->getOperand(3); + unsigned ST = MI->getOperand(4).getImm(); + assert(SS != ST); + + uint16_t W = getRegBitWidth(RD); + RegisterCell Res(W); + Res.insert(RegisterCell::ref(getCell(RS, Inputs)), mask(RD.Reg, SS)); + Res.insert(RegisterCell::ref(getCell(RT, Inputs)), mask(RD.Reg, ST)); + putCell(RD, Res, Outputs); + break; + } + + case TargetOpcode::COPY: { + // COPY can transfer a smaller register into a wider one. + // If that is the case, fill the remaining high bits with 0. + RegisterRef RD = MI->getOperand(0); + RegisterRef RS = MI->getOperand(1); + assert(RD.Sub == 0); + uint16_t WD = getRegBitWidth(RD); + uint16_t WS = getRegBitWidth(RS); + assert(WD >= WS); + RegisterCell Src = getCell(RS, Inputs); + RegisterCell Res(WD); + Res.insert(Src, BitMask(0, WS-1)); + Res.fill(WS, WD, BitValue::Zero); + putCell(RD, Res, Outputs); + break; + } + + default: + return false; + } + + return true; +} + + +// Main W-Z implementation. + +void BT::visitPHI(const MachineInstr *PI) { + int ThisN = PI->getParent()->getNumber(); + if (Trace) + dbgs() << "Visit FI(BB#" << ThisN << "): " << *PI; + + const MachineOperand &MD = PI->getOperand(0); + assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition"); + RegisterRef DefRR(MD); + uint16_t DefBW = ME.getRegBitWidth(DefRR); + + RegisterCell DefC = ME.getCell(DefRR, Map); + if (DefC == RegisterCell::self(DefRR.Reg, DefBW)) // XXX slow + return; + + bool Changed = false; + + for (unsigned i = 1, n = PI->getNumOperands(); i < n; i += 2) { + const MachineBasicBlock *PB = PI->getOperand(i+1).getMBB(); + int PredN = PB->getNumber(); + if (Trace) + dbgs() << " edge BB#" << PredN << "->BB#" << ThisN; + if (!EdgeExec.count(CFGEdge(PredN, ThisN))) { + if (Trace) + dbgs() << " not executable\n"; + continue; + } + + RegisterRef RU = PI->getOperand(i); + RegisterCell ResC = ME.getCell(RU, Map); + if (Trace) + dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub) + << " cell: " << ResC << "\n"; + Changed |= DefC.meet(ResC, DefRR.Reg); + } + + if (Changed) { + if (Trace) + dbgs() << "Output: " << PrintReg(DefRR.Reg, &ME.TRI, DefRR.Sub) + << " cell: " << DefC << "\n"; + ME.putCell(DefRR, DefC, Map); + visitUsesOf(DefRR.Reg); + } +} + + +void BT::visitNonBranch(const MachineInstr *MI) { + if (Trace) { + int ThisN = MI->getParent()->getNumber(); + dbgs() << "Visit MI(BB#" << ThisN << "): " << *MI; + } + if (MI->isDebugValue()) + return; + assert(!MI->isBranch() && "Unexpected branch instruction"); + + CellMapType ResMap; + bool Eval = ME.evaluate(MI, Map, ResMap); + + if (Trace && Eval) { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + RegisterRef RU(MO); + dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub) + << " cell: " << ME.getCell(RU, Map) << "\n"; + } + dbgs() << "Outputs:\n"; + for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end(); + I != E; ++I) { + RegisterRef RD(I->first); + dbgs() << " " << PrintReg(I->first, &ME.TRI) << " cell: " + << ME.getCell(RD, ResMap) << "\n"; + } + } + + // Iterate over all definitions of the instruction, and update the + // cells accordingly. + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Visit register defs only. + if (!MO.isReg() || !MO.isDef()) + continue; + RegisterRef RD(MO); + assert(RD.Sub == 0 && "Unexpected sub-register in definition"); + if (!TargetRegisterInfo::isVirtualRegister(RD.Reg)) + continue; + + bool Changed = false; + if (!Eval || ResMap.count(RD.Reg) == 0) { + // Set to "ref" (aka "bottom"). + uint16_t DefBW = ME.getRegBitWidth(RD); + RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW); + if (RefC != ME.getCell(RD, Map)) { + ME.putCell(RD, RefC, Map); + Changed = true; + } + } else { + RegisterCell DefC = ME.getCell(RD, Map); + RegisterCell ResC = ME.getCell(RD, ResMap); + // This is a non-phi instruction, so the values of the inputs come + // from the same registers each time this instruction is evaluated. + // During the propagation, the values of the inputs can become lowered + // in the sense of the lattice operation, which may cause different + // results to be calculated in subsequent evaluations. This should + // not cause the bottoming of the result in the map, since the new + // result is already reflecting the lowered inputs. + for (uint16_t i = 0, w = DefC.width(); i < w; ++i) { + BitValue &V = DefC[i]; + // Bits that are already "bottom" should not be updated. + if (V.Type == BitValue::Ref && V.RefI.Reg == RD.Reg) + continue; + // Same for those that are identical in DefC and ResC. + if (V == ResC[i]) + continue; + V = ResC[i]; + Changed = true; + } + if (Changed) + ME.putCell(RD, DefC, Map); + } + if (Changed) + visitUsesOf(RD.Reg); + } +} + + +void BT::visitBranchesFrom(const MachineInstr *BI) { + const MachineBasicBlock &B = *BI->getParent(); + MachineBasicBlock::const_iterator It = BI, End = B.end(); + BranchTargetList Targets, BTs; + bool FallsThrough = true, DefaultToAll = false; + int ThisN = B.getNumber(); + + do { + BTs.clear(); + const MachineInstr *MI = &*It; + if (Trace) + dbgs() << "Visit BR(BB#" << ThisN << "): " << *MI; + assert(MI->isBranch() && "Expecting branch instruction"); + InstrExec.insert(MI); + bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough); + if (!Eval) { + // If the evaluation failed, we will add all targets. Keep going in + // the loop to mark all executable branches as such. + DefaultToAll = true; + FallsThrough = true; + if (Trace) + dbgs() << " failed to evaluate: will add all CFG successors\n"; + } else if (!DefaultToAll) { + // If evaluated successfully add the targets to the cumulative list. + if (Trace) { + dbgs() << " adding targets:"; + for (unsigned i = 0, n = BTs.size(); i < n; ++i) + dbgs() << " BB#" << BTs[i]->getNumber(); + if (FallsThrough) + dbgs() << "\n falls through\n"; + else + dbgs() << "\n does not fall through\n"; + } + Targets.insert(BTs.begin(), BTs.end()); + } + ++It; + } while (FallsThrough && It != End); + + typedef MachineBasicBlock::const_succ_iterator succ_iterator; + if (!DefaultToAll) { + // Need to add all CFG successors that lead to EH landing pads. + // There won't be explicit branches to these blocks, but they must + // be processed. + for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) { + const MachineBasicBlock *SB = *I; + if (SB->isEHPad()) + Targets.insert(SB); + } + if (FallsThrough) { + MachineFunction::const_iterator BIt = B.getIterator(); + MachineFunction::const_iterator Next = std::next(BIt); + if (Next != MF.end()) + Targets.insert(&*Next); + } + } else { + for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) + Targets.insert(*I); + } + + for (unsigned i = 0, n = Targets.size(); i < n; ++i) { + int TargetN = Targets[i]->getNumber(); + FlowQ.push(CFGEdge(ThisN, TargetN)); + } +} + + +void BT::visitUsesOf(unsigned Reg) { + if (Trace) + dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; + + typedef MachineRegisterInfo::use_nodbg_iterator use_iterator; + use_iterator End = MRI.use_nodbg_end(); + for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) { + MachineInstr *UseI = I->getParent(); + if (!InstrExec.count(UseI)) + continue; + if (UseI->isPHI()) + visitPHI(UseI); + else if (!UseI->isBranch()) + visitNonBranch(UseI); + else + visitBranchesFrom(UseI); + } +} + + +BT::RegisterCell BT::get(RegisterRef RR) const { + return ME.getCell(RR, Map); +} + + +void BT::put(RegisterRef RR, const RegisterCell &RC) { + ME.putCell(RR, RC, Map); +} + + +// Replace all references to bits from OldRR with the corresponding bits +// in NewRR. +void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { + assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map"); + BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub); + BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub); + uint16_t OMB = OM.first(), OME = OM.last(); + uint16_t NMB = NM.first(), NME = NM.last(); + (void)NME; + assert((OME-OMB == NME-NMB) && + "Substituting registers of different lengths"); + for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { + RegisterCell &RC = I->second; + for (uint16_t i = 0, w = RC.width(); i < w; ++i) { + BitValue &V = RC[i]; + if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg) + continue; + if (V.RefI.Pos < OMB || V.RefI.Pos > OME) + continue; + V.RefI.Reg = NewRR.Reg; + V.RefI.Pos += NMB-OMB; + } + } +} + + +// Check if the block has been "executed" during propagation. (If not, the +// block is dead, but it may still appear to be reachable.) +bool BT::reached(const MachineBasicBlock *B) const { + int BN = B->getNumber(); + assert(BN >= 0); + for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end(); + I != E; ++I) { + if (I->second == BN) + return true; + } + return false; +} + + +void BT::reset() { + EdgeExec.clear(); + InstrExec.clear(); + Map.clear(); +} + + +void BT::run() { + reset(); + assert(FlowQ.empty()); + + typedef GraphTraits<const MachineFunction*> MachineFlowGraphTraits; + const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF); + + unsigned MaxBN = 0; + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + assert(I->getNumber() >= 0 && "Disconnected block"); + unsigned BN = I->getNumber(); + if (BN > MaxBN) + MaxBN = BN; + } + + // Keep track of visited blocks. + BitVector BlockScanned(MaxBN+1); + + int EntryN = Entry->getNumber(); + // Generate a fake edge to get something to start with. + FlowQ.push(CFGEdge(-1, EntryN)); + + while (!FlowQ.empty()) { + CFGEdge Edge = FlowQ.front(); + FlowQ.pop(); + + if (EdgeExec.count(Edge)) + continue; + EdgeExec.insert(Edge); + + const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second); + MachineBasicBlock::const_iterator It = B.begin(), End = B.end(); + // Visit PHI nodes first. + while (It != End && It->isPHI()) { + const MachineInstr *PI = &*It++; + InstrExec.insert(PI); + visitPHI(PI); + } + + // If this block has already been visited through a flow graph edge, + // then the instructions have already been processed. Any updates to + // the cells would now only happen through visitUsesOf... + if (BlockScanned[Edge.second]) + continue; + BlockScanned[Edge.second] = true; + + // Visit non-branch instructions. + while (It != End && !It->isBranch()) { + const MachineInstr *MI = &*It++; + InstrExec.insert(MI); + visitNonBranch(MI); + } + // If block end has been reached, add the fall-through edge to the queue. + if (It == End) { + MachineFunction::const_iterator BIt = B.getIterator(); + MachineFunction::const_iterator Next = std::next(BIt); + if (Next != MF.end() && B.isSuccessor(&*Next)) { + int ThisN = B.getNumber(); + int NextN = Next->getNumber(); + FlowQ.push(CFGEdge(ThisN, NextN)); + } + } else { + // Handle the remaining sequence of branches. This function will update + // the work queue. + visitBranchesFrom(It); + } + } // while (!FlowQ->empty()) + + if (Trace) { + dbgs() << "Cells after propagation:\n"; + for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) + dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; + } +} + diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h new file mode 100644 index 0000000..959c831 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h @@ -0,0 +1,435 @@ +//===--- BitTracker.h -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef BITTRACKER_H +#define BITTRACKER_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" + +#include <map> +#include <queue> +#include <set> + +namespace llvm { + class ConstantInt; + class MachineRegisterInfo; + class MachineBasicBlock; + class MachineInstr; + class MachineOperand; + class raw_ostream; + +struct BitTracker { + struct BitRef; + struct RegisterRef; + struct BitValue; + struct BitMask; + struct RegisterCell; + struct MachineEvaluator; + + typedef SetVector<const MachineBasicBlock *> BranchTargetList; + + typedef std::map<unsigned, RegisterCell> CellMapType; + + BitTracker(const MachineEvaluator &E, MachineFunction &F); + ~BitTracker(); + + void run(); + void trace(bool On = false) { Trace = On; } + bool has(unsigned Reg) const; + const RegisterCell &lookup(unsigned Reg) const; + RegisterCell get(RegisterRef RR) const; + void put(RegisterRef RR, const RegisterCell &RC); + void subst(RegisterRef OldRR, RegisterRef NewRR); + bool reached(const MachineBasicBlock *B) const; + +private: + void visitPHI(const MachineInstr *PI); + void visitNonBranch(const MachineInstr *MI); + void visitBranchesFrom(const MachineInstr *BI); + void visitUsesOf(unsigned Reg); + void reset(); + + typedef std::pair<int,int> CFGEdge; + typedef std::set<CFGEdge> EdgeSetType; + typedef std::set<const MachineInstr *> InstrSetType; + typedef std::queue<CFGEdge> EdgeQueueType; + + EdgeSetType EdgeExec; // Executable flow graph edges. + InstrSetType InstrExec; // Executable instructions. + EdgeQueueType FlowQ; // Work queue of CFG edges. + bool Trace; // Enable tracing for debugging. + + const MachineEvaluator &ME; + MachineFunction &MF; + MachineRegisterInfo &MRI; + CellMapType ⤅ +}; + + +// Abstraction of a reference to bit at position Pos from a register Reg. +struct BitTracker::BitRef { + BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} + bool operator== (const BitRef &BR) const { + // If Reg is 0, disregard Pos. + return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); + } + unsigned Reg; + uint16_t Pos; +}; + + +// Abstraction of a register reference in MachineOperand. It contains the +// register number and the subregister index. +struct BitTracker::RegisterRef { + RegisterRef(unsigned R = 0, unsigned S = 0) + : Reg(R), Sub(S) {} + RegisterRef(const MachineOperand &MO) + : Reg(MO.getReg()), Sub(MO.getSubReg()) {} + unsigned Reg, Sub; +}; + + +// Value that a single bit can take. This is outside of the context of +// any register, it is more of an abstraction of the two-element set of +// possible bit values. One extension here is the "Ref" type, which +// indicates that this bit takes the same value as the bit described by +// RefInfo. +struct BitTracker::BitValue { + enum ValueType { + Top, // Bit not yet defined. + Zero, // Bit = 0. + One, // Bit = 1. + Ref // Bit value same as the one described in RefI. + // Conceptually, there is no explicit "bottom" value: the lattice's + // bottom will be expressed as a "ref to itself", which, in the context + // of registers, could be read as "this value of this bit is defined by + // this bit". + // The ordering is: + // x <= Top, + // Self <= x, where "Self" is "ref to itself". + // This makes the value lattice different for each virtual register + // (even for each bit in the same virtual register), since the "bottom" + // for one register will be a simple "ref" for another register. + // Since we do not store the "Self" bit and register number, the meet + // operation will need to take it as a parameter. + // + // In practice there is a special case for values that are not associa- + // ted with any specific virtual register. An example would be a value + // corresponding to a bit of a physical register, or an intermediate + // value obtained in some computation (such as instruction evaluation). + // Such cases are identical to the usual Ref type, but the register + // number is 0. In such case the Pos field of the reference is ignored. + // + // What is worthy of notice is that in value V (that is a "ref"), as long + // as the RefI.Reg is not 0, it may actually be the same register as the + // one in which V will be contained. If the RefI.Pos refers to the posi- + // tion of V, then V is assumed to be "bottom" (as a "ref to itself"), + // otherwise V is taken to be identical to the referenced bit of the + // same register. + // If RefI.Reg is 0, however, such a reference to the same register is + // not possible. Any value V that is a "ref", and whose RefI.Reg is 0 + // is treated as "bottom". + }; + ValueType Type; + BitRef RefI; + + BitValue(ValueType T = Top) : Type(T) {} + BitValue(bool B) : Type(B ? One : Zero) {} + BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {} + + bool operator== (const BitValue &V) const { + if (Type != V.Type) + return false; + if (Type == Ref && !(RefI == V.RefI)) + return false; + return true; + } + bool operator!= (const BitValue &V) const { + return !operator==(V); + } + bool is(unsigned T) const { + assert(T == 0 || T == 1); + return T == 0 ? Type == Zero + : (T == 1 ? Type == One : false); + } + + // The "meet" operation is the "." operation in a semilattice (L, ., T, B): + // (1) x.x = x + // (2) x.y = y.x + // (3) x.(y.z) = (x.y).z + // (4) x.T = x (i.e. T = "top") + // (5) x.B = B (i.e. B = "bottom") + // + // This "meet" function will update the value of the "*this" object with + // the newly calculated one, and return "true" if the value of *this has + // changed, and "false" otherwise. + // To prove that it satisfies the conditions (1)-(5), it is sufficient + // to show that a relation + // x <= y <=> x.y = x + // defines a partial order (i.e. that "meet" is same as "infimum"). + bool meet(const BitValue &V, const BitRef &Self) { + // First, check the cases where there is nothing to be done. + if (Type == Ref && RefI == Self) // Bottom.meet(V) = Bottom (i.e. This) + return false; + if (V.Type == Top) // This.meet(Top) = This + return false; + if (*this == V) // This.meet(This) = This + return false; + + // At this point, we know that the value of "this" will change. + // If it is Top, it will become the same as V, otherwise it will + // become "bottom" (i.e. Self). + if (Type == Top) { + Type = V.Type; + RefI = V.RefI; // This may be irrelevant, but copy anyway. + return true; + } + // Become "bottom". + Type = Ref; + RefI = Self; + return true; + } + + // Create a reference to the bit value V. + static BitValue ref(const BitValue &V); + // Create a "self". + static BitValue self(const BitRef &Self = BitRef()); + + bool num() const { + return Type == Zero || Type == One; + } + operator bool() const { + assert(Type == Zero || Type == One); + return Type == One; + } + + friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV); +}; + + +// This operation must be idempotent, i.e. ref(ref(V)) == ref(V). +inline BitTracker::BitValue +BitTracker::BitValue::ref(const BitValue &V) { + if (V.Type != Ref) + return BitValue(V.Type); + if (V.RefI.Reg != 0) + return BitValue(V.RefI.Reg, V.RefI.Pos); + return self(); +} + + +inline BitTracker::BitValue +BitTracker::BitValue::self(const BitRef &Self) { + return BitValue(Self.Reg, Self.Pos); +} + + +// A sequence of bits starting from index B up to and including index E. +// If E < B, the mask represents two sections: [0..E] and [B..W) where +// W is the width of the register. +struct BitTracker::BitMask { + BitMask() : B(0), E(0) {} + BitMask(uint16_t b, uint16_t e) : B(b), E(e) {} + uint16_t first() const { return B; } + uint16_t last() const { return E; } +private: + uint16_t B, E; +}; + + +// Representation of a register: a list of BitValues. +struct BitTracker::RegisterCell { + RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {} + + uint16_t width() const { + return Bits.size(); + } + const BitValue &operator[](uint16_t BitN) const { + assert(BitN < Bits.size()); + return Bits[BitN]; + } + BitValue &operator[](uint16_t BitN) { + assert(BitN < Bits.size()); + return Bits[BitN]; + } + + bool meet(const RegisterCell &RC, unsigned SelfR); + RegisterCell &insert(const RegisterCell &RC, const BitMask &M); + RegisterCell extract(const BitMask &M) const; // Returns a new cell. + RegisterCell &rol(uint16_t Sh); // Rotate left. + RegisterCell &fill(uint16_t B, uint16_t E, const BitValue &V); + RegisterCell &cat(const RegisterCell &RC); // Concatenate. + uint16_t cl(bool B) const; + uint16_t ct(bool B) const; + + bool operator== (const RegisterCell &RC) const; + bool operator!= (const RegisterCell &RC) const { + return !operator==(RC); + } + + // Generate a "ref" cell for the corresponding register. In the resulting + // cell each bit will be described as being the same as the corresponding + // bit in register Reg (i.e. the cell is "defined" by register Reg). + static RegisterCell self(unsigned Reg, uint16_t Width); + // Generate a "top" cell of given size. + static RegisterCell top(uint16_t Width); + // Generate a cell that is a "ref" to another cell. + static RegisterCell ref(const RegisterCell &C); + +private: + // The DefaultBitN is here only to avoid frequent reallocation of the + // memory in the vector. + static const unsigned DefaultBitN = 32; + typedef SmallVector<BitValue, DefaultBitN> BitValueList; + BitValueList Bits; + + friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC); +}; + + +inline bool BitTracker::has(unsigned Reg) const { + return Map.find(Reg) != Map.end(); +} + + +inline const BitTracker::RegisterCell& +BitTracker::lookup(unsigned Reg) const { + CellMapType::const_iterator F = Map.find(Reg); + assert(F != Map.end()); + return F->second; +} + + +inline BitTracker::RegisterCell +BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { + RegisterCell RC(Width); + for (uint16_t i = 0; i < Width; ++i) + RC.Bits[i] = BitValue::self(BitRef(Reg, i)); + return RC; +} + + +inline BitTracker::RegisterCell +BitTracker::RegisterCell::top(uint16_t Width) { + RegisterCell RC(Width); + for (uint16_t i = 0; i < Width; ++i) + RC.Bits[i] = BitValue(BitValue::Top); + return RC; +} + + +inline BitTracker::RegisterCell +BitTracker::RegisterCell::ref(const RegisterCell &C) { + uint16_t W = C.width(); + RegisterCell RC(W); + for (unsigned i = 0; i < W; ++i) + RC[i] = BitValue::ref(C[i]); + return RC; +} + +// A class to evaluate target's instructions and update the cell maps. +// This is used internally by the bit tracker. A target that wants to +// utilize this should implement the evaluation functions (noted below) +// in a subclass of this class. +struct BitTracker::MachineEvaluator { + MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M) + : TRI(T), MRI(M) {} + virtual ~MachineEvaluator() {} + + uint16_t getRegBitWidth(const RegisterRef &RR) const; + + RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const; + void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const; + // A result of any operation should use refs to the source cells, not + // the cells directly. This function is a convenience wrapper to quickly + // generate a ref for a cell corresponding to a register reference. + RegisterCell getRef(const RegisterRef &RR, const CellMapType &M) const { + RegisterCell RC = getCell(RR, M); + return RegisterCell::ref(RC); + } + + // Helper functions. + // Check if a cell is an immediate value (i.e. all bits are either 0 or 1). + bool isInt(const RegisterCell &A) const; + // Convert cell to an immediate value. + uint64_t toInt(const RegisterCell &A) const; + + // Generate cell from an immediate value. + RegisterCell eIMM(int64_t V, uint16_t W) const; + RegisterCell eIMM(const ConstantInt *CI) const; + + // Arithmetic. + RegisterCell eADD(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eSUB(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eMLS(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eMLU(const RegisterCell &A1, const RegisterCell &A2) const; + + // Shifts. + RegisterCell eASL(const RegisterCell &A1, uint16_t Sh) const; + RegisterCell eLSR(const RegisterCell &A1, uint16_t Sh) const; + RegisterCell eASR(const RegisterCell &A1, uint16_t Sh) const; + + // Logical. + RegisterCell eAND(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eORL(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eXOR(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eNOT(const RegisterCell &A1) const; + + // Set bit, clear bit. + RegisterCell eSET(const RegisterCell &A1, uint16_t BitN) const; + RegisterCell eCLR(const RegisterCell &A1, uint16_t BitN) const; + + // Count leading/trailing bits (zeros/ones). + RegisterCell eCLB(const RegisterCell &A1, bool B, uint16_t W) const; + RegisterCell eCTB(const RegisterCell &A1, bool B, uint16_t W) const; + + // Sign/zero extension. + RegisterCell eSXT(const RegisterCell &A1, uint16_t FromN) const; + RegisterCell eZXT(const RegisterCell &A1, uint16_t FromN) const; + + // Extract/insert + // XTR R,b,e: extract bits from A1 starting at bit b, ending at e-1. + // INS R,S,b: take R and replace bits starting from b with S. + RegisterCell eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const; + RegisterCell eINS(const RegisterCell &A1, const RegisterCell &A2, + uint16_t AtN) const; + + // User-provided functions for individual targets: + + // Return a sub-register mask that indicates which bits in Reg belong + // to the subregister Sub. These bits are assumed to be contiguous in + // the super-register, and have the same ordering in the sub-register + // as in the super-register. It is valid to call this function with + // Sub == 0, in this case, the function should return a mask that spans + // the entire register Reg (which is what the default implementation + // does). + virtual BitMask mask(unsigned Reg, unsigned Sub) const; + // Indicate whether a given register class should be tracked. + virtual bool track(const TargetRegisterClass *RC) const { return true; } + // Evaluate a non-branching machine instruction, given the cell map with + // the input values. Place the results in the Outputs map. Return "true" + // if evaluation succeeded, "false" otherwise. + virtual bool evaluate(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const; + // Evaluate a branch, given the cell map with the input values. Fill out + // a list of all possible branch targets and indicate (through a flag) + // whether the branch could fall-through. Return "true" if this information + // has been successfully computed, "false" otherwise. + virtual bool evaluate(const MachineInstr *BI, const CellMapType &Inputs, + BranchTargetList &Targets, bool &FallsThru) const = 0; + + const TargetRegisterInfo &TRI; + MachineRegisterInfo &MRI; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp new file mode 100644 index 0000000..4a9c341 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -0,0 +1,1605 @@ +//===-- HexagonDisassembler.cpp - Disassembler for Hexagon ISA ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-disassembler" + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCChecker.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonInstPrinter.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" +#include <vector> + +using namespace llvm; +using namespace Hexagon; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +/// \brief Hexagon disassembler for all Hexagon platforms. +class HexagonDisassembler : public MCDisassembler { +public: + std::unique_ptr<MCInstrInfo const> const MCII; + std::unique_ptr<MCInst *> CurrentBundle; + HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {} + + DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, raw_ostream &CStream, + bool &Complete) const; + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; + + void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const; + void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const; +}; +} + +// Forward declare these because the auto-generated code will reference them. +// Definitions are further down. + +static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn); +static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, + void const *Decoder); + +static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, + raw_ostream &os); + +static unsigned getRegFromSubinstEncoding(unsigned encoded_reg); + +static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, + uint64_t Address, const void *Decoder); +static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); + +#include "HexagonGenDisassemblerTables.inc" + +static MCDisassembler *createHexagonDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo()); +} + +extern "C" void LLVMInitializeHexagonDisassembler() { + TargetRegistry::RegisterMCDisassembler(TheHexagonTarget, + createHexagonDisassembler); +} + +DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + DecodeStatus Result = DecodeStatus::Success; + bool Complete = false; + Size = 0; + + *CurrentBundle = &MI; + MI = HexagonMCInstrInfo::createBundle(); + while (Result == Success && Complete == false) { + if (Bytes.size() < HEXAGON_INSTR_SIZE) + return MCDisassembler::Fail; + MCInst *Inst = new (getContext()) MCInst; + Result = getSingleInstruction(*Inst, MI, Bytes, Address, os, cs, Complete); + MI.addOperand(MCOperand::createInst(Inst)); + Size += HEXAGON_INSTR_SIZE; + Bytes = Bytes.slice(HEXAGON_INSTR_SIZE); + } + if(Result == MCDisassembler::Fail) + return Result; + HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo()); + if(!Checker.check()) + return MCDisassembler::Fail; + return MCDisassembler::Success; +} + +namespace { +HexagonDisassembler const &disassembler(void const *Decoder) { + return *static_cast<HexagonDisassembler const *>(Decoder); +} +MCContext &contextFromDecoder(void const *Decoder) { + return disassembler(Decoder).getContext(); +} +} + +DecodeStatus HexagonDisassembler::getSingleInstruction( + MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &os, raw_ostream &cs, bool &Complete) const { + assert(Bytes.size() >= HEXAGON_INSTR_SIZE); + + uint32_t Instruction = + (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0); + + auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB); + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_LOOP_END) { + if (BundleSize == 0) + HexagonMCInstrInfo::setInnerLoop(MCB); + else if (BundleSize == 1) + HexagonMCInstrInfo::setOuterLoop(MCB); + else + return DecodeStatus::Fail; + } + + DecodeStatus Result = DecodeStatus::Success; + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_DUPLEX) { + // Determine the instruction class of each instruction in the duplex. + unsigned duplexIClass, IClassLow, IClassHigh; + + duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1); + switch (duplexIClass) { + default: + return MCDisassembler::Fail; + case 0: + IClassLow = HexagonII::HSIG_L1; + IClassHigh = HexagonII::HSIG_L1; + break; + case 1: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_L1; + break; + case 2: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_L2; + break; + case 3: + IClassLow = HexagonII::HSIG_A; + IClassHigh = HexagonII::HSIG_A; + break; + case 4: + IClassLow = HexagonII::HSIG_L1; + IClassHigh = HexagonII::HSIG_A; + break; + case 5: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_A; + break; + case 6: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_A; + break; + case 7: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_A; + break; + case 8: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_L1; + break; + case 9: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_L2; + break; + case 10: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_S1; + break; + case 11: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_S1; + break; + case 12: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_L1; + break; + case 13: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_L2; + break; + case 14: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_S2; + break; + } + + // Set the MCInst to be a duplex instruction. Which one doesn't matter. + MI.setOpcode(Hexagon::DuplexIClass0); + + // Decode each instruction in the duplex. + // Create an MCInst for each instruction. + unsigned instLow = Instruction & 0x1fff; + unsigned instHigh = (Instruction >> 16) & 0x1fff; + unsigned opLow; + if (GetSubinstOpcode(IClassLow, instLow, opLow, os) != + MCDisassembler::Success) + return MCDisassembler::Fail; + unsigned opHigh; + if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) != + MCDisassembler::Success) + return MCDisassembler::Fail; + MCInst *MILow = new (getContext()) MCInst; + MILow->setOpcode(opLow); + MCInst *MIHigh = new (getContext()) MCInst; + MIHigh->setOpcode(opHigh); + addSubinstOperands(MILow, opLow, instLow); + addSubinstOperands(MIHigh, opHigh, instHigh); + // see ConvertToSubInst() in + // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp + + // Add the duplex instruction MCInsts as operands to the passed in MCInst. + MCOperand OPLow = MCOperand::createInst(MILow); + MCOperand OPHigh = MCOperand::createInst(MIHigh); + MI.addOperand(OPLow); + MI.addOperand(OPHigh); + Complete = true; + } else { + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_PACKET_END) + Complete = true; + // Calling the auto-generated decoder function. + Result = + decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI); + + // If a, "standard" insn isn't found check special cases. + if (MCDisassembler::Success != Result || + MI.getOpcode() == Hexagon::A4_ext) { + Result = decodeImmext(MI, Instruction, this); + if (MCDisassembler::Success != Result) { + Result = decodeSpecial(MI, Instruction); + } + } else { + // If the instruction is a compound instruction, register values will + // follow the duplex model, so the register values in the MCInst are + // incorrect. If the instruction is a compound, loop through the + // operands and change registers appropriately. + if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) == + HexagonII::TypeCOMPOUND) { + for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) { + if (i->isReg()) { + unsigned reg = i->getReg() - Hexagon::R0; + i->setReg(getRegFromSubinstEncoding(reg)); + } + } + } + } + } + + if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) { + unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI); + MCOperand &MCO = MI.getOperand(OpIndex); + assert(MCO.isReg() && "New value consumers must be registers"); + unsigned Register = + getContext().getRegisterInfo()->getEncodingValue(MCO.getReg()); + if ((Register & 0x6) == 0) + // HexagonPRM 10.11 Bit 1-2 == 0 is reserved + return MCDisassembler::Fail; + unsigned Lookback = (Register & 0x6) >> 1; + unsigned Offset = 1; + bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI); + auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto i = Instructions.end() - 1; + for (auto n = Instructions.begin() - 1;; --i, ++Offset) { + if (i == n) + // Couldn't find producer + return MCDisassembler::Fail; + if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst())) + // Skip scalars when calculating distances for vectors + ++Lookback; + if (HexagonMCInstrInfo::isImmext(*i->getInst())) + ++Lookback; + if (Offset == Lookback) + break; + } + auto const &Inst = *i->getInst(); + bool SubregBit = (Register & 0x1) != 0; + if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) { + // If subreg bit is set we're selecting the second produced newvalue + unsigned Producer = + HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg(); + assert(Producer != Hexagon::NoRegister); + MCO.setReg(Producer); + } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) { + unsigned Producer = + HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg(); + if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) + Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0; + else if (SubregBit) + // Subreg bit should not be set for non-doublevector newvalue producers + return MCDisassembler::Fail; + assert(Producer != Hexagon::NoRegister); + MCO.setReg(Producer); + } else + return MCDisassembler::Fail; + } + + adjustExtendedInstructions(MI, MCB); + MCInst const *Extender = + HexagonMCInstrInfo::extenderForIndex(MCB, + HexagonMCInstrInfo::bundleSize(MCB)); + if(Extender != nullptr) { + MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ? + *MI.getOperand(1).getInst() : MI; + if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) && + !HexagonMCInstrInfo::isExtended(*MCII, Inst)) + return MCDisassembler::Fail; + } + return Result; +} + +void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI, + MCInst const &MCB) const { + if (!HexagonMCInstrInfo::hasExtenderForIndex( + MCB, HexagonMCInstrInfo::bundleSize(MCB))) { + unsigned opcode; + // This code is used by the disassembler to disambiguate between GP + // relative and absolute addressing instructions since they both have + // same encoding bits. However, an absolute addressing instruction must + // follow an immediate extender. Disassembler alwaus select absolute + // addressing instructions first and uses this code to change them into + // GP relative instruction in the absence of the corresponding immediate + // extender. + switch (MCI.getOpcode()) { + case Hexagon::S2_storerbabs: + opcode = Hexagon::S2_storerbgp; + break; + case Hexagon::S2_storerhabs: + opcode = Hexagon::S2_storerhgp; + break; + case Hexagon::S2_storerfabs: + opcode = Hexagon::S2_storerfgp; + break; + case Hexagon::S2_storeriabs: + opcode = Hexagon::S2_storerigp; + break; + case Hexagon::S2_storerbnewabs: + opcode = Hexagon::S2_storerbnewgp; + break; + case Hexagon::S2_storerhnewabs: + opcode = Hexagon::S2_storerhnewgp; + break; + case Hexagon::S2_storerinewabs: + opcode = Hexagon::S2_storerinewgp; + break; + case Hexagon::S2_storerdabs: + opcode = Hexagon::S2_storerdgp; + break; + case Hexagon::L4_loadrb_abs: + opcode = Hexagon::L2_loadrbgp; + break; + case Hexagon::L4_loadrub_abs: + opcode = Hexagon::L2_loadrubgp; + break; + case Hexagon::L4_loadrh_abs: + opcode = Hexagon::L2_loadrhgp; + break; + case Hexagon::L4_loadruh_abs: + opcode = Hexagon::L2_loadruhgp; + break; + case Hexagon::L4_loadri_abs: + opcode = Hexagon::L2_loadrigp; + break; + case Hexagon::L4_loadrd_abs: + opcode = Hexagon::L2_loadrdgp; + break; + default: + opcode = MCI.getOpcode(); + } + MCI.setOpcode(opcode); + } +} + +namespace llvm { +extern const MCInstrDesc HexagonInsts[]; +} + +static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, + ArrayRef<MCPhysReg> Table) { + if (RegNo < Table.size()) { + Inst.addOperand(MCOperand::createReg(Table[RegNo])); + return MCDisassembler::Success; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + static const MCPhysReg IntRegDecoderTable[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, + Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, + Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, + Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, + Hexagon::R30, Hexagon::R31}; + + return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable); +} + +static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecRegDecoderTable[] = { + Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, + Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14, + Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19, + Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24, + Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29, + Hexagon::V30, Hexagon::V31}; + + return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable); +} + +static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg DoubleRegDecoderTable[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, + Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, + Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, + Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; + + return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable); +} + +static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecDblRegDecoderTable[] = { + Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7, + Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11, + Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15}; + + return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable)); +} + +static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, + Hexagon::P2, Hexagon::P3}; + + return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable); +} + +static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, + Hexagon::Q2, Hexagon::Q3}; + + return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable); +} + +static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg CtrlRegDecoderTable[] = { + Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, + Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7, + Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, + Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC + }; + + if (RegNo >= array_lengthof(CtrlRegDecoderTable)) + return MCDisassembler::Fail; + + if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlRegDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg CtrlReg64DecoderTable[] = { + Hexagon::C1_0, Hexagon::NoRegister, + Hexagon::C3_2, Hexagon::NoRegister, + Hexagon::C7_6, Hexagon::NoRegister, + Hexagon::C9_8, Hexagon::NoRegister, + Hexagon::C11_10, Hexagon::NoRegister, + Hexagon::CS, Hexagon::NoRegister, + Hexagon::UPC, Hexagon::NoRegister + }; + + if (RegNo >= array_lengthof(CtrlReg64DecoderTable)) + return MCDisassembler::Fail; + + if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlReg64DecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + unsigned Register = 0; + switch (RegNo) { + case 0: + Register = Hexagon::M0; + break; + case 1: + Register = Hexagon::M1; + break; + default: + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +namespace { +uint32_t fullValue(MCInstrInfo const &MCII, + MCInst &MCB, + MCInst &MI, + int64_t Value) { + MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex( + MCB, HexagonMCInstrInfo::bundleSize(MCB)); + if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI)) + return Value; + unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f; + int64_t Bits; + bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits); + assert(Success);(void)Success; + uint32_t Upper26 = static_cast<uint32_t>(Bits); + uint32_t Operand = Upper26 | Lower6; + return Operand; +} +template <size_t T> +void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + int64_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, SignExtend64<T>(tmp)); + int64_t Extended = SignExtend64<32>(FullValue); + HexagonMCInstrInfo::addConstant(MI, Extended, + Disassembler.getContext()); +} +} + +static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, + const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + int64_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, tmp); + assert(FullValue >= 0 && "Negative in unsigned decoder"); + HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext()); + return MCDisassembler::Success; +} + +static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<16>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<12>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<11>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder)); + return MCDisassembler::Success; +} + +static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<13>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<14>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<10>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, + const void *Decoder) { + signedDecoder<8>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<6>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<4>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<5>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<6>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<7>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<10>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<19>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +// custom decoder for various jump/call immediates +static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI); + // r13_2 is not extendable, so if there are no extent bits, it's r13_2 + if (Bits == 0) + Bits = 15; + uint32_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, SignExtend64(tmp, Bits)); + int64_t Extended = SignExtend64<32>(FullValue) + Address; + if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, + 0, 4)) + HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); + return MCDisassembler::Success; +} + +// Addressing mode dependent load store opcode map. +// - If an insn is preceded by an extender the address is absolute. +// - memw(##symbol) = r0 +// - If an insn is not preceded by an extender the address is GP relative. +// - memw(gp + #symbol) = r0 +// Please note that the instructions must be ordered in the descending order +// of their opcode. +// HexagonII::INST_ICLASS_ST +static const unsigned int StoreConditionalOpcodeData[][2] = { + {S4_pstorerdfnew_abs, 0xafc02084}, + {S4_pstorerdtnew_abs, 0xafc02080}, + {S4_pstorerdf_abs, 0xafc00084}, + {S4_pstorerdt_abs, 0xafc00080}, + {S4_pstorerinewfnew_abs, 0xafa03084}, + {S4_pstorerinewtnew_abs, 0xafa03080}, + {S4_pstorerhnewfnew_abs, 0xafa02884}, + {S4_pstorerhnewtnew_abs, 0xafa02880}, + {S4_pstorerbnewfnew_abs, 0xafa02084}, + {S4_pstorerbnewtnew_abs, 0xafa02080}, + {S4_pstorerinewf_abs, 0xafa01084}, + {S4_pstorerinewt_abs, 0xafa01080}, + {S4_pstorerhnewf_abs, 0xafa00884}, + {S4_pstorerhnewt_abs, 0xafa00880}, + {S4_pstorerbnewf_abs, 0xafa00084}, + {S4_pstorerbnewt_abs, 0xafa00080}, + {S4_pstorerifnew_abs, 0xaf802084}, + {S4_pstoreritnew_abs, 0xaf802080}, + {S4_pstorerif_abs, 0xaf800084}, + {S4_pstorerit_abs, 0xaf800080}, + {S4_pstorerhfnew_abs, 0xaf402084}, + {S4_pstorerhtnew_abs, 0xaf402080}, + {S4_pstorerhf_abs, 0xaf400084}, + {S4_pstorerht_abs, 0xaf400080}, + {S4_pstorerbfnew_abs, 0xaf002084}, + {S4_pstorerbtnew_abs, 0xaf002080}, + {S4_pstorerbf_abs, 0xaf000084}, + {S4_pstorerbt_abs, 0xaf000080}}; +// HexagonII::INST_ICLASS_LD + +// HexagonII::INST_ICLASS_LD_ST_2 +static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000}, + {L4_loadri_abs, 0x49800000}, + {L4_loadruh_abs, 0x49600000}, + {L4_loadrh_abs, 0x49400000}, + {L4_loadrub_abs, 0x49200000}, + {L4_loadrb_abs, 0x49000000}, + {S2_storerdabs, 0x48c00000}, + {S2_storerinewabs, 0x48a01000}, + {S2_storerhnewabs, 0x48a00800}, + {S2_storerbnewabs, 0x48a00000}, + {S2_storeriabs, 0x48800000}, + {S2_storerfabs, 0x48600000}, + {S2_storerhabs, 0x48400000}, + {S2_storerbabs, 0x48000000}}; +static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData); +static const size_t NumLS = array_lengthof(LoadStoreOpcodeData); + +static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { + + unsigned MachineOpcode = 0; + unsigned LLVMOpcode = 0; + + if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) { + for (size_t i = 0; i < NumCondS; ++i) { + if ((insn & StoreConditionalOpcodeData[i][1]) == + StoreConditionalOpcodeData[i][1]) { + MachineOpcode = StoreConditionalOpcodeData[i][1]; + LLVMOpcode = StoreConditionalOpcodeData[i][0]; + break; + } + } + } + if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) { + for (size_t i = 0; i < NumLS; ++i) { + if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) { + MachineOpcode = LoadStoreOpcodeData[i][1]; + LLVMOpcode = LoadStoreOpcodeData[i][0]; + break; + } + } + } + + if (MachineOpcode) { + unsigned Value = 0; + unsigned shift = 0; + MI.setOpcode(LLVMOpcode); + // Remove the parse bits from the insn. + insn &= ~HexagonII::INST_PARSE_MASK; + + switch (LLVMOpcode) { + default: + return MCDisassembler::Fail; + break; + + case Hexagon::S4_pstorerdf_abs: + case Hexagon::S4_pstorerdt_abs: + case Hexagon::S4_pstorerdfnew_abs: + case Hexagon::S4_pstorerdtnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Rtt + Value = (insn >> 8) & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::S4_pstorerbnewf_abs: + case Hexagon::S4_pstorerbnewt_abs: + case Hexagon::S4_pstorerbnewfnew_abs: + case Hexagon::S4_pstorerbnewtnew_abs: + case Hexagon::S4_pstorerhnewf_abs: + case Hexagon::S4_pstorerhnewt_abs: + case Hexagon::S4_pstorerhnewfnew_abs: + case Hexagon::S4_pstorerhnewtnew_abs: + case Hexagon::S4_pstorerinewf_abs: + case Hexagon::S4_pstorerinewt_abs: + case Hexagon::S4_pstorerinewfnew_abs: + case Hexagon::S4_pstorerinewtnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Nt + Value = (insn >> 8) & UINT64_C(7); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::S4_pstorerbf_abs: + case Hexagon::S4_pstorerbt_abs: + case Hexagon::S4_pstorerbfnew_abs: + case Hexagon::S4_pstorerbtnew_abs: + case Hexagon::S4_pstorerhf_abs: + case Hexagon::S4_pstorerht_abs: + case Hexagon::S4_pstorerhfnew_abs: + case Hexagon::S4_pstorerhtnew_abs: + case Hexagon::S4_pstorerif_abs: + case Hexagon::S4_pstorerit_abs: + case Hexagon::S4_pstorerifnew_abs: + case Hexagon::S4_pstoreritnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Rt + Value = (insn >> 8) & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::L4_ploadrdf_abs: + case Hexagon::L4_ploadrdt_abs: + case Hexagon::L4_ploadrdfnew_abs: + case Hexagon::L4_ploadrdtnew_abs: { + // op: Rdd + Value = insn & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + // op: Pt + Value = ((insn >> 9) & UINT64_C(3)); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = ((insn >> 15) & UINT64_C(62)); + Value |= ((insn >> 8) & UINT64_C(1)); + MI.addOperand(MCOperand::createImm(Value)); + break; + } + + case Hexagon::L4_ploadrbf_abs: + case Hexagon::L4_ploadrbt_abs: + case Hexagon::L4_ploadrbfnew_abs: + case Hexagon::L4_ploadrbtnew_abs: + case Hexagon::L4_ploadrhf_abs: + case Hexagon::L4_ploadrht_abs: + case Hexagon::L4_ploadrhfnew_abs: + case Hexagon::L4_ploadrhtnew_abs: + case Hexagon::L4_ploadrubf_abs: + case Hexagon::L4_ploadrubt_abs: + case Hexagon::L4_ploadrubfnew_abs: + case Hexagon::L4_ploadrubtnew_abs: + case Hexagon::L4_ploadruhf_abs: + case Hexagon::L4_ploadruht_abs: + case Hexagon::L4_ploadruhfnew_abs: + case Hexagon::L4_ploadruhtnew_abs: + case Hexagon::L4_ploadrif_abs: + case Hexagon::L4_ploadrit_abs: + case Hexagon::L4_ploadrifnew_abs: + case Hexagon::L4_ploadritnew_abs: + // op: Rd + Value = insn & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + // op: Pt + Value = (insn >> 9) & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 15) & UINT64_C(62); + Value |= (insn >> 8) & UINT64_C(1); + MI.addOperand(MCOperand::createImm(Value)); + break; + + // op: g16_2 + case (Hexagon::L4_loadri_abs): + ++shift; + // op: g16_1 + case Hexagon::L4_loadrh_abs: + case Hexagon::L4_loadruh_abs: + ++shift; + // op: g16_0 + case Hexagon::L4_loadrb_abs: + case Hexagon::L4_loadrub_abs: { + // op: Rd + Value |= insn & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(511); + MI.addOperand(MCOperand::createImm(Value << shift)); + break; + } + + case Hexagon::L4_loadrd_abs: { + Value = insn & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(511); + MI.addOperand(MCOperand::createImm(Value << 3)); + break; + } + + case Hexagon::S2_storerdabs: { + // op: g16_3 + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << 3)); + // op: Rtt + Value = (insn >> 8) & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + break; + } + + // op: g16_2 + case Hexagon::S2_storerinewabs: + ++shift; + // op: g16_1 + case Hexagon::S2_storerhnewabs: + ++shift; + // op: g16_0 + case Hexagon::S2_storerbnewabs: { + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << shift)); + // op: Nt + Value = (insn >> 8) & UINT64_C(7); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + // op: g16_2 + case Hexagon::S2_storeriabs: + ++shift; + // op: g16_1 + case Hexagon::S2_storerhabs: + case Hexagon::S2_storerfabs: + ++shift; + // op: g16_0 + case Hexagon::S2_storerbabs: { + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << shift)); + // op: Rt + Value = (insn >> 8) & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + } + return MCDisassembler::Success; + } + return MCDisassembler::Fail; +} + +static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, + void const *Decoder) { + + // Instruction Class for a constant a extender: bits 31:28 = 0x0000 + if ((~insn & 0xf0000000) == 0xf0000000) { + unsigned Value; + // 27:16 High 12 bits of 26-bit extender. + Value = (insn & 0x0fff0000) << 4; + // 13:0 Low 14 bits of 26-bit extender. + Value |= ((insn & 0x3fff) << 6); + MI.setOpcode(Hexagon::A4_ext); + HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder)); + return MCDisassembler::Success; + } + return MCDisassembler::Fail; +} + +// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td +enum subInstBinaryValues { + V4_SA1_addi_BITS = 0x0000, + V4_SA1_addi_MASK = 0x1800, + V4_SA1_addrx_BITS = 0x1800, + V4_SA1_addrx_MASK = 0x1f00, + V4_SA1_addsp_BITS = 0x0c00, + V4_SA1_addsp_MASK = 0x1c00, + V4_SA1_and1_BITS = 0x1200, + V4_SA1_and1_MASK = 0x1f00, + V4_SA1_clrf_BITS = 0x1a70, + V4_SA1_clrf_MASK = 0x1e70, + V4_SA1_clrfnew_BITS = 0x1a50, + V4_SA1_clrfnew_MASK = 0x1e70, + V4_SA1_clrt_BITS = 0x1a60, + V4_SA1_clrt_MASK = 0x1e70, + V4_SA1_clrtnew_BITS = 0x1a40, + V4_SA1_clrtnew_MASK = 0x1e70, + V4_SA1_cmpeqi_BITS = 0x1900, + V4_SA1_cmpeqi_MASK = 0x1f00, + V4_SA1_combine0i_BITS = 0x1c00, + V4_SA1_combine0i_MASK = 0x1d18, + V4_SA1_combine1i_BITS = 0x1c08, + V4_SA1_combine1i_MASK = 0x1d18, + V4_SA1_combine2i_BITS = 0x1c10, + V4_SA1_combine2i_MASK = 0x1d18, + V4_SA1_combine3i_BITS = 0x1c18, + V4_SA1_combine3i_MASK = 0x1d18, + V4_SA1_combinerz_BITS = 0x1d08, + V4_SA1_combinerz_MASK = 0x1d08, + V4_SA1_combinezr_BITS = 0x1d00, + V4_SA1_combinezr_MASK = 0x1d08, + V4_SA1_dec_BITS = 0x1300, + V4_SA1_dec_MASK = 0x1f00, + V4_SA1_inc_BITS = 0x1100, + V4_SA1_inc_MASK = 0x1f00, + V4_SA1_seti_BITS = 0x0800, + V4_SA1_seti_MASK = 0x1c00, + V4_SA1_setin1_BITS = 0x1a00, + V4_SA1_setin1_MASK = 0x1e40, + V4_SA1_sxtb_BITS = 0x1500, + V4_SA1_sxtb_MASK = 0x1f00, + V4_SA1_sxth_BITS = 0x1400, + V4_SA1_sxth_MASK = 0x1f00, + V4_SA1_tfr_BITS = 0x1000, + V4_SA1_tfr_MASK = 0x1f00, + V4_SA1_zxtb_BITS = 0x1700, + V4_SA1_zxtb_MASK = 0x1f00, + V4_SA1_zxth_BITS = 0x1600, + V4_SA1_zxth_MASK = 0x1f00, + V4_SL1_loadri_io_BITS = 0x0000, + V4_SL1_loadri_io_MASK = 0x1000, + V4_SL1_loadrub_io_BITS = 0x1000, + V4_SL1_loadrub_io_MASK = 0x1000, + V4_SL2_deallocframe_BITS = 0x1f00, + V4_SL2_deallocframe_MASK = 0x1fc0, + V4_SL2_jumpr31_BITS = 0x1fc0, + V4_SL2_jumpr31_MASK = 0x1fc4, + V4_SL2_jumpr31_f_BITS = 0x1fc5, + V4_SL2_jumpr31_f_MASK = 0x1fc7, + V4_SL2_jumpr31_fnew_BITS = 0x1fc7, + V4_SL2_jumpr31_fnew_MASK = 0x1fc7, + V4_SL2_jumpr31_t_BITS = 0x1fc4, + V4_SL2_jumpr31_t_MASK = 0x1fc7, + V4_SL2_jumpr31_tnew_BITS = 0x1fc6, + V4_SL2_jumpr31_tnew_MASK = 0x1fc7, + V4_SL2_loadrb_io_BITS = 0x1000, + V4_SL2_loadrb_io_MASK = 0x1800, + V4_SL2_loadrd_sp_BITS = 0x1e00, + V4_SL2_loadrd_sp_MASK = 0x1f00, + V4_SL2_loadrh_io_BITS = 0x0000, + V4_SL2_loadrh_io_MASK = 0x1800, + V4_SL2_loadri_sp_BITS = 0x1c00, + V4_SL2_loadri_sp_MASK = 0x1e00, + V4_SL2_loadruh_io_BITS = 0x0800, + V4_SL2_loadruh_io_MASK = 0x1800, + V4_SL2_return_BITS = 0x1f40, + V4_SL2_return_MASK = 0x1fc4, + V4_SL2_return_f_BITS = 0x1f45, + V4_SL2_return_f_MASK = 0x1fc7, + V4_SL2_return_fnew_BITS = 0x1f47, + V4_SL2_return_fnew_MASK = 0x1fc7, + V4_SL2_return_t_BITS = 0x1f44, + V4_SL2_return_t_MASK = 0x1fc7, + V4_SL2_return_tnew_BITS = 0x1f46, + V4_SL2_return_tnew_MASK = 0x1fc7, + V4_SS1_storeb_io_BITS = 0x1000, + V4_SS1_storeb_io_MASK = 0x1000, + V4_SS1_storew_io_BITS = 0x0000, + V4_SS1_storew_io_MASK = 0x1000, + V4_SS2_allocframe_BITS = 0x1c00, + V4_SS2_allocframe_MASK = 0x1e00, + V4_SS2_storebi0_BITS = 0x1200, + V4_SS2_storebi0_MASK = 0x1f00, + V4_SS2_storebi1_BITS = 0x1300, + V4_SS2_storebi1_MASK = 0x1f00, + V4_SS2_stored_sp_BITS = 0x0a00, + V4_SS2_stored_sp_MASK = 0x1e00, + V4_SS2_storeh_io_BITS = 0x0000, + V4_SS2_storeh_io_MASK = 0x1800, + V4_SS2_storew_sp_BITS = 0x0800, + V4_SS2_storew_sp_MASK = 0x1e00, + V4_SS2_storewi0_BITS = 0x1000, + V4_SS2_storewi0_MASK = 0x1f00, + V4_SS2_storewi1_BITS = 0x1100, + V4_SS2_storewi1_MASK = 0x1f00 +}; + +static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, + raw_ostream &os) { + switch (IClass) { + case HexagonII::HSIG_L1: + if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS) + op = Hexagon::V4_SL1_loadri_io; + else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS) + op = Hexagon::V4_SL1_loadrub_io; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_L2: + if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS) + op = Hexagon::V4_SL2_deallocframe; + else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS) + op = Hexagon::V4_SL2_jumpr31; + else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS) + op = Hexagon::V4_SL2_jumpr31_f; + else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS) + op = Hexagon::V4_SL2_jumpr31_fnew; + else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS) + op = Hexagon::V4_SL2_jumpr31_t; + else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS) + op = Hexagon::V4_SL2_jumpr31_tnew; + else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS) + op = Hexagon::V4_SL2_loadrb_io; + else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS) + op = Hexagon::V4_SL2_loadrd_sp; + else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS) + op = Hexagon::V4_SL2_loadrh_io; + else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS) + op = Hexagon::V4_SL2_loadri_sp; + else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS) + op = Hexagon::V4_SL2_loadruh_io; + else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS) + op = Hexagon::V4_SL2_return; + else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS) + op = Hexagon::V4_SL2_return_f; + else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS) + op = Hexagon::V4_SL2_return_fnew; + else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS) + op = Hexagon::V4_SL2_return_t; + else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS) + op = Hexagon::V4_SL2_return_tnew; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_A: + if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS) + op = Hexagon::V4_SA1_addi; + else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS) + op = Hexagon::V4_SA1_addrx; + else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS) + op = Hexagon::V4_SA1_addsp; + else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS) + op = Hexagon::V4_SA1_and1; + else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS) + op = Hexagon::V4_SA1_clrf; + else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS) + op = Hexagon::V4_SA1_clrfnew; + else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS) + op = Hexagon::V4_SA1_clrt; + else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS) + op = Hexagon::V4_SA1_clrtnew; + else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS) + op = Hexagon::V4_SA1_cmpeqi; + else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS) + op = Hexagon::V4_SA1_combine0i; + else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS) + op = Hexagon::V4_SA1_combine1i; + else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS) + op = Hexagon::V4_SA1_combine2i; + else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS) + op = Hexagon::V4_SA1_combine3i; + else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS) + op = Hexagon::V4_SA1_combinerz; + else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS) + op = Hexagon::V4_SA1_combinezr; + else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS) + op = Hexagon::V4_SA1_dec; + else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS) + op = Hexagon::V4_SA1_inc; + else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS) + op = Hexagon::V4_SA1_seti; + else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS) + op = Hexagon::V4_SA1_setin1; + else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS) + op = Hexagon::V4_SA1_sxtb; + else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS) + op = Hexagon::V4_SA1_sxth; + else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS) + op = Hexagon::V4_SA1_tfr; + else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS) + op = Hexagon::V4_SA1_zxtb; + else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS) + op = Hexagon::V4_SA1_zxth; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_S1: + if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS) + op = Hexagon::V4_SS1_storeb_io; + else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS) + op = Hexagon::V4_SS1_storew_io; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_S2: + if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS) + op = Hexagon::V4_SS2_allocframe; + else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS) + op = Hexagon::V4_SS2_storebi0; + else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS) + op = Hexagon::V4_SS2_storebi1; + else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS) + op = Hexagon::V4_SS2_stored_sp; + else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS) + op = Hexagon::V4_SS2_storeh_io; + else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS) + op = Hexagon::V4_SS2_storew_sp; + else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS) + op = Hexagon::V4_SS2_storewi0; + else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS) + op = Hexagon::V4_SS2_storewi1; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + default: + os << "<unknown>"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} + +static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) { + if (encoded_reg < 8) + return Hexagon::R0 + encoded_reg; + else if (encoded_reg < 16) + return Hexagon::R0 + encoded_reg + 8; + + // patently false value + return Hexagon::NoRegister; +} + +static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) { + if (encoded_dreg < 4) + return Hexagon::D0 + encoded_dreg; + else if (encoded_dreg < 8) + return Hexagon::D0 + encoded_dreg + 4; + + // patently false value + return Hexagon::NoRegister; +} + +void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, + unsigned inst) const { + int64_t operand; + MCOperand Op; + switch (opcode) { + case Hexagon::V4_SL2_deallocframe: + case Hexagon::V4_SL2_jumpr31: + case Hexagon::V4_SL2_jumpr31_f: + case Hexagon::V4_SL2_jumpr31_fnew: + case Hexagon::V4_SL2_jumpr31_t: + case Hexagon::V4_SL2_jumpr31_tnew: + case Hexagon::V4_SL2_return: + case Hexagon::V4_SL2_return_f: + case Hexagon::V4_SL2_return_fnew: + case Hexagon::V4_SL2_return_t: + case Hexagon::V4_SL2_return_tnew: + // no operands for these instructions + break; + case Hexagon::V4_SS2_allocframe: + // u 8-4{5_3} + operand = ((inst & 0x1f0) >> 4) << 3; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SL1_loadri_io: + // Rd 3-0, Rs 7-4, u 11-8{4_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 6; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SL1_loadrub_io: + // Rd 3-0, Rs 7-4, u 11-8 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 8; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SL2_loadrb_io: + // Rd 3-0, Rs 7-4, u 10-8 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x700) >> 8; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SL2_loadrh_io: + case Hexagon::V4_SL2_loadruh_io: + // Rd 3-0, Rs 7-4, u 10-8{3_1} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x700) >> 8) << 1; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SL2_loadrd_sp: + // Rdd 2-0, u 7-3{5_3} + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x0f8) >> 3) << 3; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SL2_loadri_sp: + // Rd 3-0, u 8-4{5_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x1f0) >> 4) << 2; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SA1_addi: + // Rx 3-0 (x2), s7 10-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + MI->addOperand(Op); + operand = SignExtend64<7>((inst & 0x7f0) >> 4); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SA1_addrx: + // Rx 3-0 (x2), Rs 7-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + case Hexagon::V4_SA1_and1: + case Hexagon::V4_SA1_dec: + case Hexagon::V4_SA1_inc: + case Hexagon::V4_SA1_sxtb: + case Hexagon::V4_SA1_sxth: + case Hexagon::V4_SA1_tfr: + case Hexagon::V4_SA1_zxtb: + case Hexagon::V4_SA1_zxth: + // Rd 3-0, Rs 7-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_addsp: + // Rd 3-0, u 9-4{6_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x3f0) >> 4) << 2; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SA1_seti: + // Rd 3-0, u 9-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x3f0) >> 4; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SA1_clrf: + case Hexagon::V4_SA1_clrfnew: + case Hexagon::V4_SA1_clrt: + case Hexagon::V4_SA1_clrtnew: + case Hexagon::V4_SA1_setin1: + // Rd 3-0 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_cmpeqi: + // Rs 7-4, u 1-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = inst & 0x3; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SA1_combine0i: + case Hexagon::V4_SA1_combine1i: + case Hexagon::V4_SA1_combine2i: + case Hexagon::V4_SA1_combine3i: + // Rdd 2-0, u 6-5 + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x060) >> 5; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SA1_combinerz: + case Hexagon::V4_SA1_combinezr: + // Rdd 2-0, Rs 7-4 + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS1_storeb_io: + // Rs 7-4, u 11-8, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 8; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS1_storew_io: + // Rs 7-4, u 11-8{4_2}, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0xf00) >> 8) << 2; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storebi0: + case Hexagon::V4_SS2_storebi1: + // Rs 7-4, u 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = inst & 0xf; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SS2_storewi0: + case Hexagon::V4_SS2_storewi1: + // Rs 7-4, u 3-0{4_2} + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf) << 2; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + break; + case Hexagon::V4_SS2_stored_sp: + // s 8-3{6_3}, Rtt 2-0 + operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storeh_io: + // Rs 7-4, u 10-8{3_1}, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x700) >> 8) << 1; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storew_sp: + // u 8-4{5_2}, Rd 3-0 + operand = ((inst & 0x1f0) >> 4) << 2; + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + default: + // don't crash with an invalid subinstruction + // llvm_unreachable("Invalid subinstruction in duplex instruction"); + break; + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h new file mode 100644 index 0000000..ed7d957 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h @@ -0,0 +1,56 @@ +//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Hexagon back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H + +#define Hexagon_POINTER_SIZE 4 + +#define Hexagon_PointerSize (Hexagon_POINTER_SIZE) +#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8) +#define Hexagon_WordSize Hexagon_PointerSize +#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits + +// allocframe saves LR and FP on stack before allocating +// a new stack frame. This takes 8 bytes. +#define HEXAGON_LRFP_SIZE 8 + +// Normal instruction size (in bytes). +#define HEXAGON_INSTR_SIZE 4 + +// Maximum number of words and instructions in a packet. +#define HEXAGON_PACKET_SIZE 4 +#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE) +// Minimum number of instructions in an end-loop packet. +#define HEXAGON_PACKET_INNER_SIZE 2 +#define HEXAGON_PACKET_OUTER_SIZE 3 +// Maximum number of instructions in a packet before shuffling, +// including a compound one or a duplex or an extender. +#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3) + +// Name of the global offset table as defined by the Hexagon ABI +#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_" + +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class HexagonTargetMachine; + + /// \brief Creates a Hexagon-specific Target Transformation Info pass. + ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM); +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td new file mode 100644 index 0000000..1189cfd --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td @@ -0,0 +1,263 @@ +//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the Hexagon target. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Hexagon Subtarget features. +//===----------------------------------------------------------------------===// + +// Hexagon Architectures +def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">; +def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; +def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">; +def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">; + +// Hexagon ISA Extensions +def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps", + "true", "Hexagon HVX instructions">; +def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", + "true", "Hexagon HVX Double instructions">; + +//===----------------------------------------------------------------------===// +// Hexagon Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// +def HasV5T : Predicate<"HST->hasV5TOps()">; +def NoV5T : Predicate<"!HST->hasV5TOps()">; +def HasV55T : Predicate<"HST->hasV55TOps()">, + AssemblerPredicate<"ArchV55">; +def HasV60T : Predicate<"HST->hasV60TOps()">, + AssemblerPredicate<"ArchV60">; +def UseMEMOP : Predicate<"HST->useMemOps()">; +def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; +def UseHVXDbl : Predicate<"HST->useHVXDblOps()">, + AssemblerPredicate<"ExtensionHVXDbl">; +def UseHVXSgl : Predicate<"HST->useHVXSglOps()">; + +def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">, + AssemblerPredicate<"ExtensionHVX">; + +//===----------------------------------------------------------------------===// +// Classes used for relation maps. +//===----------------------------------------------------------------------===// + +class ImmRegShl; +// PredRel - Filter class used to relate non-predicated instructions with their +// predicated forms. +class PredRel; +// PredNewRel - Filter class used to relate predicated instructions with their +// predicate-new forms. +class PredNewRel: PredRel; +// ImmRegRel - Filter class used to relate instructions having reg-reg form +// with their reg-imm counterparts. +class ImmRegRel; +// NewValueRel - Filter class used to relate regular store instructions with +// their new-value store form. +class NewValueRel: PredNewRel; +// NewValueRel - Filter class used to relate load/store instructions having +// different addressing modes with each other. +class AddrModeRel: NewValueRel; +class IntrinsicsRel; + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate non-predicate instructions with their +// predicated formats - true and false. +// + +def getPredOpcode : InstrMapping { + let FilterClass = "PredRel"; + // Instructions with the same BaseOpcode and isNVStore values form a row. + let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"]; + // Instructions with the same predicate sense form a column. + let ColFields = ["PredSense"]; + // The key column is the unpredicated instructions. + let KeyCol = [""]; + // Value columns are PredSense=true and PredSense=false + let ValueCols = [["true"], ["false"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicate-true instructions with their +// predicate-false forms +// +def getFalsePredOpcode : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"]; + let ColFields = ["PredSense"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicate-false instructions with their +// predicate-true forms +// +def getTruePredOpcode : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"]; + let ColFields = ["PredSense"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicated instructions with their .new +// format. +// +def getPredNewOpcode : InstrMapping { + let FilterClass = "PredNewRel"; + let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"]; + let ColFields = ["PNewValue"]; + let KeyCol = [""]; + let ValueCols = [["new"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate .new predicated instructions with their old +// format. +// +def getPredOldOpcode : InstrMapping { + let FilterClass = "PredNewRel"; + let RowFields = ["BaseOpcode", "PredSense", "isNVStore"]; + let ColFields = ["PNewValue"]; + let KeyCol = ["new"]; + let ValueCols = [[""]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate store instructions with their new-value +// format. +// +def getNewValueOpcode : InstrMapping { + let FilterClass = "NewValueRel"; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"]; + let ColFields = ["NValueST"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate new-value store instructions with their old +// format. +// +def getNonNVStore : InstrMapping { + let FilterClass = "NewValueRel"; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"]; + let ColFields = ["NValueST"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +def getBaseWithImmOffset : InstrMapping { + let FilterClass = "AddrModeRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore", + "isFloat"]; + let ColFields = ["addrMode"]; + let KeyCol = ["Absolute"]; + let ValueCols = [["BaseImmOffset"]]; +} + +def getBaseWithRegOffset : InstrMapping { + let FilterClass = "AddrModeRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["addrMode"]; + let KeyCol = ["BaseImmOffset"]; + let ValueCols = [["BaseRegOffset"]]; +} + +def getRegForm : InstrMapping { + let FilterClass = "ImmRegRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue"]; + let ColFields = ["InputType"]; + let KeyCol = ["imm"]; + let ValueCols = [["reg"]]; +} + +def getRegShlForm : InstrMapping { + let FilterClass = "ImmRegShl"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["InputType"]; + let KeyCol = ["imm"]; + let ValueCols = [["reg"]]; +} + +def notTakenBranchPrediction : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"]; + let ColFields = ["isBrTaken"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +def takenBranchPrediction : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"]; + let ColFields = ["isBrTaken"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +def getRealHWInstr : InstrMapping { + let FilterClass = "IntrinsicsRel"; + let RowFields = ["BaseOpcode"]; + let ColFields = ["InstrType"]; + let KeyCol = ["Pseudo"]; + let ValueCols = [["Pseudo"], ["Real"]]; +} +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// +include "HexagonSchedule.td" +include "HexagonRegisterInfo.td" +include "HexagonCallingConv.td" +include "HexagonInstrInfo.td" +include "HexagonIntrinsics.td" +include "HexagonIntrinsicsDerived.td" + +def HexagonInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Hexagon processors supported. +//===----------------------------------------------------------------------===// + +class Proc<string Name, SchedMachineModel Model, + list<SubtargetFeature> Features> + : ProcessorModel<Name, Model, Features>; + +def : Proc<"hexagonv4", HexagonModelV4, + [ArchV4]>; +def : Proc<"hexagonv5", HexagonModelV4, + [ArchV4, ArchV5]>; +def : Proc<"hexagonv55", HexagonModelV55, + [ArchV4, ArchV5, ArchV55]>; +def : Proc<"hexagonv60", HexagonModelV60, + [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def HexagonAsmParserVariant : AsmParserVariant { + int Variant = 0; + string TokenizingCharacters = "#()=:.<>!+*"; +} + +def Hexagon : Target { + // Pull in Instruction Info: + let InstructionSet = HexagonInstrInfo; + let AssemblyParserVariants = [HexagonAsmParserVariant]; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp new file mode 100644 index 0000000..e213089 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -0,0 +1,598 @@ +//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Hexagon assembly language. This printer is +// the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "MCTargetDesc/HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace llvm { + void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP); +} + +#define DEBUG_TYPE "asm-printer" + +static cl::opt<bool> AlignCalls( + "hexagon-align-calls", cl::Hidden, cl::init(true), + cl::desc("Insert falign after call instruction for Hexagon target")); + +// Given a scalar register return its pair. +inline static unsigned getHexagonRegisterPair(unsigned Reg, + const MCRegisterInfo *RI) { + assert(Hexagon::IntRegsRegClass.contains(Reg)); + MCSuperRegIterator SR(Reg, RI, false); + unsigned Pair = *SR; + assert(Hexagon::DoubleRegsRegClass.contains(Pair)); + return Pair; +} + +HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {} + +void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MO.getType()) { + default: llvm_unreachable ("<unknown operand type>"); + case MachineOperand::MO_Register: + O << HexagonInstPrinter::getRegisterName(MO.getReg()); + return; + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(O, MAI); + return; + case MachineOperand::MO_ConstantPoolIndex: + GetCPISymbol(MO.getIndex())->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: + // Computing the address of a global symbol, not calling it. + getSymbol(MO.getGlobal())->print(O, MAI); + printOffset(MO.getOffset(), O); + return; + } +} + +// +// isBlockOnlyReachableByFallthrough - We need to override this since the +// default AsmPrinter does not print labels for any basic block that +// is only reachable by a fall through. That works for all cases except +// for the case in which the basic block is reachable by a fall through but +// through an indirect from a jump table. In this case, the jump table +// will contain a label not defined by AsmPrinter. +// +bool HexagonAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + if (MBB->hasAddressTaken()) + return false; + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS); + case 'c': // Don't print "$" before a global var name or constant. + // Hexagon never has a prefix. + printOperand(MI, OpNo, OS); + return false; + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + OS << "i"; + return false; + } + } + + printOperand(MI, OpNo, OS); + return false; +} + +bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + const MachineOperand &Base = MI->getOperand(OpNo); + const MachineOperand &Offset = MI->getOperand(OpNo+1); + + if (Base.isReg()) + printOperand(MI, OpNo, O); + else + llvm_unreachable("Unimplemented"); + + if (Offset.isImm()) { + if (Offset.getImm()) + O << " + #" << Offset.getImm(); + } + else + llvm_unreachable("Unimplemented"); + + return false; +} + +MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, + MCStreamer &OutStreamer, + const MCOperand &Imm, int AlignSize) { + MCSymbol *Sym; + int64_t Value; + if (Imm.getExpr()->evaluateAsAbsolute(Value)) { + StringRef sectionPrefix; + std::string ImmString; + StringRef Name; + if (AlignSize == 8) { + Name = ".CONST_0000000000000000"; + sectionPrefix = ".gnu.linkonce.l8"; + ImmString = utohexstr(Value); + } else { + Name = ".CONST_00000000"; + sectionPrefix = ".gnu.linkonce.l4"; + ImmString = utohexstr(static_cast<uint32_t>(Value)); + } + + std::string symbolName = // Yes, leading zeros are kept. + Name.drop_back(ImmString.size()).str() + ImmString; + std::string sectionName = sectionPrefix.str() + symbolName; + + MCSectionELF *Section = OutStreamer.getContext().getELFSection( + sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + OutStreamer.SwitchSection(Section); + + Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName)); + if (Sym->isUndefined()) { + OutStreamer.EmitLabel(Sym); + OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); + OutStreamer.EmitIntValue(Value, AlignSize); + OutStreamer.EmitCodeAlignment(AlignSize); + } + } else { + assert(Imm.isExpr() && "Expected expression and found none"); + const MachineOperand &MO = MI.getOperand(1); + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = nullptr; + if (MO.isGlobal()) + MOSymbol = AP.getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = AP.GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = AP.GetJTISymbol(MO.getIndex()); + else + llvm_unreachable("Unknown operand type!"); + + StringRef SymbolName = MOSymbol->getName(); + std::string LitaName = ".CONST_" + SymbolName.str(); + + MCSectionELF *Section = OutStreamer.getContext().getELFSection( + ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + + OutStreamer.SwitchSection(Section); + Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName)); + if (Sym->isUndefined()) { + OutStreamer.EmitLabel(Sym); + OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local); + OutStreamer.EmitValue(Imm.getExpr(), AlignSize); + OutStreamer.EmitCodeAlignment(AlignSize); + } + } + return Sym; +} + +void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, + const MachineInstr &MI) { + MCInst &MappedInst = static_cast <MCInst &>(Inst); + const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo(); + + switch (Inst.getOpcode()) { + default: return; + + // "$dst = CONST64(#$src1)", + case Hexagon::CONST64_Float_Real: + case Hexagon::CONST64_Int_Real: + if (!OutStreamer->hasRawTextSupport()) { + const MCOperand &Imm = MappedInst.getOperand(1); + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + + MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8); + + OutStreamer->SwitchSection(Current.first, Current.second); + MCInst TmpInst; + MCOperand &Reg = MappedInst.getOperand(0); + TmpInst.setOpcode(Hexagon::L2_loadrdgp); + TmpInst.addOperand(Reg); + TmpInst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Sym, OutContext))); + MappedInst = TmpInst; + + } + break; + case Hexagon::CONST32: + case Hexagon::CONST32_Float_Real: + case Hexagon::CONST32_Int_Real: + case Hexagon::FCONST32_nsdata: + if (!OutStreamer->hasRawTextSupport()) { + MCOperand &Imm = MappedInst.getOperand(1); + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4); + OutStreamer->SwitchSection(Current.first, Current.second); + MCInst TmpInst; + MCOperand &Reg = MappedInst.getOperand(0); + TmpInst.setOpcode(Hexagon::L2_loadrigp); + TmpInst.addOperand(Reg); + TmpInst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Sym, OutContext))); + MappedInst = TmpInst; + } + break; + + // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use + // C2_or during instruction selection itself but it results + // into suboptimal code. + case Hexagon::C2_pxfer_map: { + MCOperand &Ps = Inst.getOperand(1); + MappedInst.setOpcode(Hexagon::C2_or); + MappedInst.addOperand(Ps); + return; + } + + // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo + // The insn is mapped from the 4 operand to the 3 operand raw form taking + // 3 register pairs. + case Hexagon::M2_vrcmpys_acc_s1: { + MCOperand &Rt = Inst.getOperand(3); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + case Hexagon::M2_vrcmpys_s1: { + MCOperand &Rt = Inst.getOperand(2); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + + case Hexagon::M2_vrcmpys_s1rp: { + MCOperand &Rt = Inst.getOperand(2); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + + case Hexagon::A4_boundscheck: { + MCOperand &Rs = Inst.getOperand(1); + assert (Rs.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rs.getReg()); + if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 + MappedInst.setOpcode(Hexagon::A4_boundscheck_hi); + else // raw:lo + MappedInst.setOpcode(Hexagon::A4_boundscheck_lo); + Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI)); + return; + } + case Hexagon::S5_asrhub_rnd_sat_goodsyntax: { + MCOperand &MO = MappedInst.getOperand(2); + int64_t Imm; + MCExpr const *Expr = MO.getExpr(); + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::S2_vsathub); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + MappedInst = TmpInst; + return; + } + TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + case Hexagon::S5_vasrhrnd_goodsyntax: + case Hexagon::S2_asr_i_p_rnd_goodsyntax: { + MCOperand &MO2 = MappedInst.getOperand(2); + MCExpr const *Expr = MO2.getExpr(); + int64_t Imm; + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(MappedInst.getOperand(0)); + MCOperand &MO1 = MappedInst.getOperand(1); + unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg); + // Add a new operand for the second register in the pair. + TmpInst.addOperand(MCOperand::createReg(High)); + TmpInst.addOperand(MCOperand::createReg(Low)); + MappedInst = TmpInst; + return; + } + + if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax) + TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd); + else + TmpInst.setOpcode(Hexagon::S5_vasrhrnd); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd + case Hexagon::S2_asr_i_r_rnd_goodsyntax: { + MCOperand &MO = Inst.getOperand(2); + MCExpr const *Expr = MO.getExpr(); + int64_t Imm; + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::A2_tfr); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + MappedInst = TmpInst; + return; + } + TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + case Hexagon::TFRI_f: + MappedInst.setOpcode(Hexagon::A2_tfrsi); + return; + case Hexagon::TFRI_cPt_f: + MappedInst.setOpcode(Hexagon::C2_cmoveit); + return; + case Hexagon::TFRI_cNotPt_f: + MappedInst.setOpcode(Hexagon::C2_cmoveif); + return; + case Hexagon::MUX_ri_f: + MappedInst.setOpcode(Hexagon::C2_muxri); + return; + case Hexagon::MUX_ir_f: + MappedInst.setOpcode(Hexagon::C2_muxir); + return; + + // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)" + case Hexagon::A2_tfrpi: { + MCInst TmpInst; + MCOperand &Rdd = MappedInst.getOperand(0); + MCOperand &MO = MappedInst.getOperand(1); + + TmpInst.setOpcode(Hexagon::A2_combineii); + TmpInst.addOperand(Rdd); + int64_t Imm; + bool Success = MO.getExpr()->evaluateAsAbsolute(Imm); + if (Success && Imm < 0) { + const MCExpr *MOne = MCConstantExpr::create(-1, OutContext); + TmpInst.addOperand(MCOperand::createExpr(MOne)); + } else { + const MCExpr *Zero = MCConstantExpr::create(0, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Zero)); + } + TmpInst.addOperand(MO); + MappedInst = TmpInst; + return; + } + // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" + case Hexagon::A2_tfrp: { + MCOperand &MO = MappedInst.getOperand(1); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode(Hexagon::A2_combinew); + return; + } + + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: { + MCOperand &MO = MappedInst.getOperand(2); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) + ? Hexagon::C2_ccombinewt + : Hexagon::C2_ccombinewf); + return; + } + case Hexagon::A2_tfrptnew: + case Hexagon::A2_tfrpfnew: { + MCOperand &MO = MappedInst.getOperand(2); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) + ? Hexagon::C2_ccombinewnewt + : Hexagon::C2_ccombinewnewf); + return; + } + + case Hexagon::M2_mpysmi: { + MCOperand &Imm = MappedInst.getOperand(2); + MCExpr const *Expr = Imm.getExpr(); + int64_t Value; + bool Success = Expr->evaluateAsAbsolute(Value); + assert(Success);(void)Success; + if (Value < 0 && Value > -256) { + MappedInst.setOpcode(Hexagon::M2_mpysin); + Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext)); + } + else + MappedInst.setOpcode(Hexagon::M2_mpysip); + return; + } + + case Hexagon::A2_addsp: { + MCOperand &Rt = Inst.getOperand(1); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::A2_addsph); + else + MappedInst.setOpcode(Hexagon::A2_addspl); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + case Hexagon::HEXAGON_V6_vd0_pseudo: + case Hexagon::HEXAGON_V6_vd0_pseudo_128B: { + MCInst TmpInst; + assert (Inst.getOperand(0).isReg() && + "Expected register and none was found"); + + TmpInst.setOpcode(Hexagon::V6_vxor); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + MappedInst = TmpInst; + return; + } + + } +} + + +/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to +/// the current output stream. +/// +void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MCInst MCB = HexagonMCInstrInfo::createBundle(); + const MCInstrInfo &MCII = *Subtarget->getInstrInfo(); + + if (MI->isBundle()) { + const MachineBasicBlock* MBB = MI->getParent(); + MachineBasicBlock::const_instr_iterator MII = MI->getIterator(); + unsigned IgnoreCount = 0; + + for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) + if (MII->getOpcode() == TargetOpcode::DBG_VALUE || + MII->getOpcode() == TargetOpcode::IMPLICIT_DEF) + ++IgnoreCount; + else + HexagonLowerToMC(MCII, &*MII, MCB, *this); + } + else + HexagonLowerToMC(MCII, MI, MCB, *this); + + bool Ok = HexagonMCInstrInfo::canonicalizePacket( + MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr); + assert(Ok); + (void)Ok; + if(HexagonMCInstrInfo::bundleSize(MCB) == 0) + return; + OutStreamer->EmitInstruction(MCB, getSubtargetInfo()); +} + +extern "C" void LLVMInitializeHexagonAsmPrinter() { + RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h new file mode 100644 index 0000000..a78d97e --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -0,0 +1,62 @@ +//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hexagon Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class HexagonAsmPrinter : public AsmPrinter { + const HexagonSubtarget *Subtarget; + + public: + explicit HexagonAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer); + + bool runOnMachineFunction(MachineFunction &Fn) override { + Subtarget = &Fn.getSubtarget<HexagonSubtarget>(); + return AsmPrinter::runOnMachineFunction(Fn); + } + + const char *getPassName() const override { + return "Hexagon Assembly Printer"; + } + + bool isBlockOnlyReachableByFallthrough( + const MachineBasicBlock *MBB) const override; + + void EmitInstruction(const MachineInstr *MI) override; + + void HexagonProcessInstruction(MCInst &Inst, + const MachineInstr &MBB); + + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + + static const char *getRegisterName(unsigned RegNo); + }; + +} // end of llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp new file mode 100644 index 0000000..77907b0 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -0,0 +1,2778 @@ +//===--- HexagonBitSimplify.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexbit" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonBitTracker.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonBitSimplifyPass(PassRegistry& Registry); + FunctionPass *createHexagonBitSimplify(); +} + +namespace { + // Set of virtual registers, based on BitVector. + struct RegisterSet : private BitVector { + RegisterSet() : BitVector() {} + explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + RegisterSet(const RegisterSet &RS) : BitVector(RS) {} + + using BitVector::clear; + using BitVector::count; + + unsigned find_first() const { + int First = BitVector::find_first(); + if (First < 0) + return 0; + return x2v(First); + } + + unsigned find_next(unsigned Prev) const { + int Next = BitVector::find_next(v2x(Prev)); + if (Next < 0) + return 0; + return x2v(Next); + } + + RegisterSet &insert(unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return static_cast<RegisterSet&>(BitVector::set(Idx)); + } + RegisterSet &remove(unsigned R) { + unsigned Idx = v2x(R); + if (Idx >= size()) + return *this; + return static_cast<RegisterSet&>(BitVector::reset(Idx)); + } + + RegisterSet &insert(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); + } + RegisterSet &remove(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::reset(Rs)); + } + + reference operator[](unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return BitVector::operator[](Idx); + } + bool operator[](unsigned R) const { + unsigned Idx = v2x(R); + assert(Idx < size()); + return BitVector::operator[](Idx); + } + bool has(unsigned R) const { + unsigned Idx = v2x(R); + if (Idx >= size()) + return false; + return BitVector::test(Idx); + } + + bool empty() const { + return !BitVector::any(); + } + bool includes(const RegisterSet &Rs) const { + // A.BitVector::test(B) <=> A-B != {} + return !Rs.BitVector::test(*this); + } + bool intersects(const RegisterSet &Rs) const { + return BitVector::anyCommon(Rs); + } + + private: + void ensure(unsigned Idx) { + if (size() <= Idx) + resize(std::max(Idx+1, 32U)); + } + static inline unsigned v2x(unsigned v) { + return TargetRegisterInfo::virtReg2Index(v); + } + static inline unsigned x2v(unsigned x) { + return TargetRegisterInfo::index2VirtReg(x); + } + }; + + + struct PrintRegSet { + PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) + : RS(S), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, + const PrintRegSet &P); + private: + const RegisterSet &RS; + const TargetRegisterInfo *TRI; + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) + LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { + OS << '{'; + for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) + OS << ' ' << PrintReg(R, P.TRI); + OS << " }"; + return OS; + } +} + + +namespace { + class Transformation; + + class HexagonBitSimplify : public MachineFunctionPass { + public: + static char ID; + HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) { + initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Hexagon bit simplification"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); + static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); + static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, + const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); + static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W); + static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W); + static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W, uint64_t &U); + static bool replaceReg(unsigned OldR, unsigned NewR, + MachineRegisterInfo &MRI); + static bool getSubregMask(const BitTracker::RegisterRef &RR, + unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); + static bool replaceRegWithSub(unsigned OldR, unsigned NewR, + unsigned NewSR, MachineRegisterInfo &MRI); + static bool replaceSubWithSub(unsigned OldR, unsigned OldSR, + unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI); + static bool parseRegSequence(const MachineInstr &I, + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH); + + static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, + uint16_t Begin); + static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, + uint16_t Begin, const HexagonInstrInfo &HII); + + static const TargetRegisterClass *getFinalVRegClass( + const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); + static bool isTransparentCopy(const BitTracker::RegisterRef &RD, + const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); + + private: + MachineDominatorTree *MDT; + + bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); + }; + + char HexagonBitSimplify::ID = 0; + typedef HexagonBitSimplify HBS; + + + // The purpose of this class is to provide a common facility to traverse + // the function top-down or bottom-up via the dominator tree, and keep + // track of the available registers. + class Transformation { + public: + bool TopDown; + Transformation(bool TD) : TopDown(TD) {} + virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; + virtual ~Transformation() {} + }; +} + +INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", + "Hexagon bit simplification", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", + "Hexagon bit simplification", false, false) + + +bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, + RegisterSet &AVs) { + MachineDomTreeNode *N = MDT->getNode(&B); + typedef GraphTraits<MachineDomTreeNode*> GTN; + bool Changed = false; + + if (T.TopDown) + Changed = T.processBlock(B, AVs); + + RegisterSet Defs; + for (auto &I : B) + getInstrDefs(I, Defs); + RegisterSet NewAVs = AVs; + NewAVs.insert(Defs); + + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + Changed |= visitBlock(*SB, T, NewAVs); + } + if (!T.TopDown) + Changed |= T.processBlock(B, AVs); + + return Changed; +} + +// +// Utility functions: +// +void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, + RegisterSet &Defs) { + for (auto &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Defs.insert(R); + } +} + +void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, + RegisterSet &Uses) { + for (auto &Op : MI.operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.insert(R); + } +} + +// Check if all the bits in range [B, E) in both cells are equal. +bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, + uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, + uint16_t W) { + for (uint16_t i = 0; i < W; ++i) { + // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. + if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) + return false; + // Same for RC2[i]. + if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) + return false; + if (RC1[B1+i] != RC2[B2+i]) + return false; + } + return true; +} + + +bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W) { + assert(B < RC.width() && B+W <= RC.width()); + for (uint16_t i = B; i < B+W; ++i) + if (!RC[i].num()) + return false; + return true; +} + + +bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W) { + assert(B < RC.width() && B+W <= RC.width()); + for (uint16_t i = B; i < B+W; ++i) + if (!RC[i].is(0)) + return false; + return true; +} + + +bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W, uint64_t &U) { + assert(B < RC.width() && B+W <= RC.width()); + int64_t T = 0; + for (uint16_t i = B+W; i > B; --i) { + const BitTracker::BitValue &BV = RC[i-1]; + T <<= 1; + if (BV.is(1)) + T |= 1; + else if (!BV.is(0)) + return false; + } + U = T; + return true; +} + + +bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, + MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + I->setReg(NewR); + } + return Begin != End; +} + + +bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, + unsigned NewSR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + I->setReg(NewR); + I->setSubReg(NewSR); + } + return Begin != End; +} + + +bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, + unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + if (I->getSubReg() != OldSR) + continue; + I->setReg(NewR); + I->setSubReg(NewSR); + } + return Begin != End; +} + + +// For a register ref (pair Reg:Sub), set Begin to the position of the LSB +// of Sub in Reg, and set Width to the size of Sub in bits. Return true, +// if this succeeded, otherwise return false. +bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, + unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { + const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); + if (RC == &Hexagon::IntRegsRegClass) { + assert(RR.Sub == 0); + Begin = 0; + Width = 32; + return true; + } + if (RC == &Hexagon::DoubleRegsRegClass) { + if (RR.Sub == 0) { + Begin = 0; + Width = 64; + return true; + } + assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg); + Width = 32; + Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32); + return true; + } + return false; +} + + +// For a REG_SEQUENCE, set SL to the low subregister and SH to the high +// subregister. +bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) { + assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); + unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); + assert(Sub1 != Sub2); + if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) { + SL = I.getOperand(1); + SH = I.getOperand(3); + return true; + } + if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) { + SH = I.getOperand(1); + SL = I.getOperand(3); + return true; + } + return false; +} + + +// All stores (except 64-bit stores) take a 32-bit register as the source +// of the value to be stored. If the instruction stores into a location +// that is shorter than 32 bits, some bits of the source register are not +// used. For each store instruction, calculate the set of used bits in +// the source register, and set appropriate bits in Bits. Return true if +// the bits are calculated, false otherwise. +bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, + uint16_t Begin) { + using namespace Hexagon; + + switch (Opc) { + // Store byte + case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 + case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new + case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 + case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 + case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 + case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 + case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new + case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new + case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new + case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new + case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 + case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new + case S4_storerb_ap: // memb(Re32=#U6)=Rt32 + case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new + case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 + case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new + case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 + case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new + case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 + case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new + case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 + case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new + case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 + case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new + case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 + case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new + case S2_storerbgp: // memb(gp+#u16:0)=Rt32 + case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new + case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 + case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 + case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 + case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 + case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new + case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new + case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new + case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new + Bits.set(Begin, Begin+8); + return true; + + // Store low half + case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 + case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new + case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 + case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 + case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 + case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 + case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new + case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new + case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new + case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new + case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 + case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new + case S4_storerh_ap: // memh(Re32=#U6)=Rt32 + case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new + case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 + case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new + case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 + case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new + case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 + case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new + case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 + case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new + case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 + case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new + case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new + case S2_storerhgp: // memh(gp+#u16:1)=Rt32 + case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new + case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 + case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 + case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 + case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 + case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new + case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new + case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new + case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new + Bits.set(Begin, Begin+16); + return true; + + // Store high half + case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 + case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 + case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 + case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 + case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 + case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 + case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 + case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 + case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 + case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 + case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 + case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 + case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 + case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 + case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 + case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 + case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 + Bits.set(Begin+16, Begin+32); + return true; + } + + return false; +} + + +// For an instruction with opcode Opc, calculate the set of bits that it +// uses in a register in operand OpN. This only calculates the set of used +// bits for cases where it does not depend on any operands (as is the case +// in shifts, for example). For concrete instructions from a program, the +// operand may be a subregister of a larger register, while Bits would +// correspond to the larger register in its entirety. Because of that, +// the parameter Begin can be used to indicate which bit of Bits should be +// considered the LSB of of the operand. +bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, + BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { + using namespace Hexagon; + + const MCInstrDesc &D = HII.get(Opc); + if (D.mayStore()) { + if (OpN == D.getNumOperands()-1) + return getUsedBitsInStore(Opc, Bits, Begin); + return false; + } + + switch (Opc) { + // One register source. Used bits: R1[0-7]. + case A2_sxtb: + case A2_zxtb: + case A4_cmpbeqi: + case A4_cmpbgti: + case A4_cmpbgtui: + if (OpN == 1) { + Bits.set(Begin, Begin+8); + return true; + } + break; + + // One register source. Used bits: R1[0-15]. + case A2_aslh: + case A2_sxth: + case A2_zxth: + case A4_cmpheqi: + case A4_cmphgti: + case A4_cmphgtui: + if (OpN == 1) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // One register source. Used bits: R1[16-31]. + case A2_asrh: + if (OpN == 1) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + + // Two register sources. Used bits: R1[0-7], R2[0-7]. + case A4_cmpbeq: + case A4_cmpbgt: + case A4_cmpbgtu: + if (OpN == 1) { + Bits.set(Begin, Begin+8); + return true; + } + break; + + // Two register sources. Used bits: R1[0-15], R2[0-15]. + case A4_cmpheq: + case A4_cmphgt: + case A4_cmphgtu: + case A2_addh_h16_ll: + case A2_addh_h16_sat_ll: + case A2_addh_l16_ll: + case A2_addh_l16_sat_ll: + case A2_combine_ll: + case A2_subh_h16_ll: + case A2_subh_h16_sat_ll: + case A2_subh_l16_ll: + case A2_subh_l16_sat_ll: + case M2_mpy_acc_ll_s0: + case M2_mpy_acc_ll_s1: + case M2_mpy_acc_sat_ll_s0: + case M2_mpy_acc_sat_ll_s1: + case M2_mpy_ll_s0: + case M2_mpy_ll_s1: + case M2_mpy_nac_ll_s0: + case M2_mpy_nac_ll_s1: + case M2_mpy_nac_sat_ll_s0: + case M2_mpy_nac_sat_ll_s1: + case M2_mpy_rnd_ll_s0: + case M2_mpy_rnd_ll_s1: + case M2_mpy_sat_ll_s0: + case M2_mpy_sat_ll_s1: + case M2_mpy_sat_rnd_ll_s0: + case M2_mpy_sat_rnd_ll_s1: + case M2_mpyd_acc_ll_s0: + case M2_mpyd_acc_ll_s1: + case M2_mpyd_ll_s0: + case M2_mpyd_ll_s1: + case M2_mpyd_nac_ll_s0: + case M2_mpyd_nac_ll_s1: + case M2_mpyd_rnd_ll_s0: + case M2_mpyd_rnd_ll_s1: + case M2_mpyu_acc_ll_s0: + case M2_mpyu_acc_ll_s1: + case M2_mpyu_ll_s0: + case M2_mpyu_ll_s1: + case M2_mpyu_nac_ll_s0: + case M2_mpyu_nac_ll_s1: + case M2_mpyud_acc_ll_s0: + case M2_mpyud_acc_ll_s1: + case M2_mpyud_ll_s0: + case M2_mpyud_ll_s1: + case M2_mpyud_nac_ll_s0: + case M2_mpyud_nac_ll_s1: + if (OpN == 1 || OpN == 2) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // Two register sources. Used bits: R1[0-15], R2[16-31]. + case A2_addh_h16_lh: + case A2_addh_h16_sat_lh: + case A2_combine_lh: + case A2_subh_h16_lh: + case A2_subh_h16_sat_lh: + case M2_mpy_acc_lh_s0: + case M2_mpy_acc_lh_s1: + case M2_mpy_acc_sat_lh_s0: + case M2_mpy_acc_sat_lh_s1: + case M2_mpy_lh_s0: + case M2_mpy_lh_s1: + case M2_mpy_nac_lh_s0: + case M2_mpy_nac_lh_s1: + case M2_mpy_nac_sat_lh_s0: + case M2_mpy_nac_sat_lh_s1: + case M2_mpy_rnd_lh_s0: + case M2_mpy_rnd_lh_s1: + case M2_mpy_sat_lh_s0: + case M2_mpy_sat_lh_s1: + case M2_mpy_sat_rnd_lh_s0: + case M2_mpy_sat_rnd_lh_s1: + case M2_mpyd_acc_lh_s0: + case M2_mpyd_acc_lh_s1: + case M2_mpyd_lh_s0: + case M2_mpyd_lh_s1: + case M2_mpyd_nac_lh_s0: + case M2_mpyd_nac_lh_s1: + case M2_mpyd_rnd_lh_s0: + case M2_mpyd_rnd_lh_s1: + case M2_mpyu_acc_lh_s0: + case M2_mpyu_acc_lh_s1: + case M2_mpyu_lh_s0: + case M2_mpyu_lh_s1: + case M2_mpyu_nac_lh_s0: + case M2_mpyu_nac_lh_s1: + case M2_mpyud_acc_lh_s0: + case M2_mpyud_acc_lh_s1: + case M2_mpyud_lh_s0: + case M2_mpyud_lh_s1: + case M2_mpyud_nac_lh_s0: + case M2_mpyud_nac_lh_s1: + // These four are actually LH. + case A2_addh_l16_hl: + case A2_addh_l16_sat_hl: + case A2_subh_l16_hl: + case A2_subh_l16_sat_hl: + if (OpN == 1) { + Bits.set(Begin, Begin+16); + return true; + } + if (OpN == 2) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + + // Two register sources, used bits: R1[16-31], R2[0-15]. + case A2_addh_h16_hl: + case A2_addh_h16_sat_hl: + case A2_combine_hl: + case A2_subh_h16_hl: + case A2_subh_h16_sat_hl: + case M2_mpy_acc_hl_s0: + case M2_mpy_acc_hl_s1: + case M2_mpy_acc_sat_hl_s0: + case M2_mpy_acc_sat_hl_s1: + case M2_mpy_hl_s0: + case M2_mpy_hl_s1: + case M2_mpy_nac_hl_s0: + case M2_mpy_nac_hl_s1: + case M2_mpy_nac_sat_hl_s0: + case M2_mpy_nac_sat_hl_s1: + case M2_mpy_rnd_hl_s0: + case M2_mpy_rnd_hl_s1: + case M2_mpy_sat_hl_s0: + case M2_mpy_sat_hl_s1: + case M2_mpy_sat_rnd_hl_s0: + case M2_mpy_sat_rnd_hl_s1: + case M2_mpyd_acc_hl_s0: + case M2_mpyd_acc_hl_s1: + case M2_mpyd_hl_s0: + case M2_mpyd_hl_s1: + case M2_mpyd_nac_hl_s0: + case M2_mpyd_nac_hl_s1: + case M2_mpyd_rnd_hl_s0: + case M2_mpyd_rnd_hl_s1: + case M2_mpyu_acc_hl_s0: + case M2_mpyu_acc_hl_s1: + case M2_mpyu_hl_s0: + case M2_mpyu_hl_s1: + case M2_mpyu_nac_hl_s0: + case M2_mpyu_nac_hl_s1: + case M2_mpyud_acc_hl_s0: + case M2_mpyud_acc_hl_s1: + case M2_mpyud_hl_s0: + case M2_mpyud_hl_s1: + case M2_mpyud_nac_hl_s0: + case M2_mpyud_nac_hl_s1: + if (OpN == 1) { + Bits.set(Begin+16, Begin+32); + return true; + } + if (OpN == 2) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // Two register sources, used bits: R1[16-31], R2[16-31]. + case A2_addh_h16_hh: + case A2_addh_h16_sat_hh: + case A2_combine_hh: + case A2_subh_h16_hh: + case A2_subh_h16_sat_hh: + case M2_mpy_acc_hh_s0: + case M2_mpy_acc_hh_s1: + case M2_mpy_acc_sat_hh_s0: + case M2_mpy_acc_sat_hh_s1: + case M2_mpy_hh_s0: + case M2_mpy_hh_s1: + case M2_mpy_nac_hh_s0: + case M2_mpy_nac_hh_s1: + case M2_mpy_nac_sat_hh_s0: + case M2_mpy_nac_sat_hh_s1: + case M2_mpy_rnd_hh_s0: + case M2_mpy_rnd_hh_s1: + case M2_mpy_sat_hh_s0: + case M2_mpy_sat_hh_s1: + case M2_mpy_sat_rnd_hh_s0: + case M2_mpy_sat_rnd_hh_s1: + case M2_mpyd_acc_hh_s0: + case M2_mpyd_acc_hh_s1: + case M2_mpyd_hh_s0: + case M2_mpyd_hh_s1: + case M2_mpyd_nac_hh_s0: + case M2_mpyd_nac_hh_s1: + case M2_mpyd_rnd_hh_s0: + case M2_mpyd_rnd_hh_s1: + case M2_mpyu_acc_hh_s0: + case M2_mpyu_acc_hh_s1: + case M2_mpyu_hh_s0: + case M2_mpyu_hh_s1: + case M2_mpyu_nac_hh_s0: + case M2_mpyu_nac_hh_s1: + case M2_mpyud_acc_hh_s0: + case M2_mpyud_acc_hh_s1: + case M2_mpyud_hh_s0: + case M2_mpyud_hh_s1: + case M2_mpyud_nac_hh_s0: + case M2_mpyud_nac_hh_s1: + if (OpN == 1 || OpN == 2) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + } + + return false; +} + + +// Calculate the register class that matches Reg:Sub. For example, if +// vreg1 is a double register, then vreg1:subreg_hireg would match "int" +// register class. +const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( + const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return nullptr; + auto *RC = MRI.getRegClass(RR.Reg); + if (RR.Sub == 0) + return RC; + + auto VerifySR = [] (unsigned Sub) -> void { + assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg); + }; + + switch (RC->getID()) { + case Hexagon::DoubleRegsRegClassID: + VerifySR(RR.Sub); + return &Hexagon::IntRegsRegClass; + } + return nullptr; +} + + +// Check if RD could be replaced with RS at any possible use of RD. +// For example a predicate register cannot be replaced with a integer +// register, but a 64-bit register with a subregister can be replaced +// with a 32-bit register. +bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, + const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || + !TargetRegisterInfo::isVirtualRegister(RS.Reg)) + return false; + // Return false if one (or both) classes are nullptr. + auto *DRC = getFinalVRegClass(RD, MRI); + if (!DRC) + return false; + + return DRC == getFinalVRegClass(RS, MRI); +} + + +// +// Dead code elimination +// +namespace { + class DeadCodeElimination { + public: + DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) + : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), + MDT(mdt), MRI(mf.getRegInfo()) {} + + bool run() { + return runOnNode(MDT.getRootNode()); + } + + private: + bool isDead(unsigned R) const; + bool runOnNode(MachineDomTreeNode *N); + + MachineFunction &MF; + const HexagonInstrInfo &HII; + MachineDominatorTree &MDT; + MachineRegisterInfo &MRI; + }; +} + + +bool DeadCodeElimination::isDead(unsigned R) const { + for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { + MachineInstr *UseI = I->getParent(); + if (UseI->isDebugValue()) + continue; + if (UseI->isPHI()) { + assert(!UseI->getOperand(0).getSubReg()); + unsigned DR = UseI->getOperand(0).getReg(); + if (DR == R) + continue; + } + return false; + } + return true; +} + + +bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { + bool Changed = false; + typedef GraphTraits<MachineDomTreeNode*> GTN; + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) + Changed |= runOnNode(*I); + + MachineBasicBlock *B = N->getBlock(); + std::vector<MachineInstr*> Instrs; + for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) + Instrs.push_back(&*I); + + for (auto MI : Instrs) { + unsigned Opc = MI->getOpcode(); + // Do not touch lifetime markers. This is why the target-independent DCE + // cannot be used. + if (Opc == TargetOpcode::LIFETIME_START || + Opc == TargetOpcode::LIFETIME_END) + continue; + bool Store = false; + if (MI->isInlineAsm()) + continue; + // Delete PHIs if possible. + if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store)) + continue; + + bool AllDead = true; + SmallVector<unsigned,2> Regs; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) { + AllDead = false; + break; + } + Regs.push_back(R); + } + if (!AllDead) + continue; + + B->erase(MI); + for (unsigned i = 0, n = Regs.size(); i != n; ++i) + MRI.markUsesInDebugValueAsUndef(Regs[i]); + Changed = true; + } + + return Changed; +} + + +// +// Eliminate redundant instructions +// +// This transformation will identify instructions where the output register +// is the same as one of its input registers. This only works on instructions +// that define a single register (unlike post-increment loads, for example). +// The equality check is actually more detailed: the code calculates which +// bits of the output are used, and only compares these bits with the input +// registers. +// If the output matches an input, the instruction is replaced with COPY. +// The copies will be removed by another transformation. +namespace { + class RedundantInstrElimination : public Transformation { + public: + RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, + unsigned &LostB, unsigned &LostE); + bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, + unsigned &LostB, unsigned &LostE); + bool computeUsedBits(unsigned Reg, BitVector &Bits); + bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, + uint16_t Begin); + bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + + +// Check if the instruction is a lossy shift left, where the input being +// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range +// of bit indices that are lost. +bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, + unsigned OpN, unsigned &LostB, unsigned &LostE) { + using namespace Hexagon; + unsigned Opc = MI.getOpcode(); + unsigned ImN, RegN, Width; + switch (Opc) { + case S2_asl_i_p: + ImN = 2; + RegN = 1; + Width = 64; + break; + case S2_asl_i_p_acc: + case S2_asl_i_p_and: + case S2_asl_i_p_nac: + case S2_asl_i_p_or: + case S2_asl_i_p_xacc: + ImN = 3; + RegN = 2; + Width = 64; + break; + case S2_asl_i_r: + ImN = 2; + RegN = 1; + Width = 32; + break; + case S2_addasl_rrri: + case S4_andi_asl_ri: + case S4_ori_asl_ri: + case S4_addi_asl_ri: + case S4_subi_asl_ri: + case S2_asl_i_r_acc: + case S2_asl_i_r_and: + case S2_asl_i_r_nac: + case S2_asl_i_r_or: + case S2_asl_i_r_sat: + case S2_asl_i_r_xacc: + ImN = 3; + RegN = 2; + Width = 32; + break; + default: + return false; + } + + if (RegN != OpN) + return false; + + assert(MI.getOperand(ImN).isImm()); + unsigned S = MI.getOperand(ImN).getImm(); + if (S == 0) + return false; + LostB = Width-S; + LostE = Width; + return true; +} + + +// Check if the instruction is a lossy shift right, where the input being +// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range +// of bit indices that are lost. +bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, + unsigned OpN, unsigned &LostB, unsigned &LostE) { + using namespace Hexagon; + unsigned Opc = MI.getOpcode(); + unsigned ImN, RegN; + switch (Opc) { + case S2_asr_i_p: + case S2_lsr_i_p: + ImN = 2; + RegN = 1; + break; + case S2_asr_i_p_acc: + case S2_asr_i_p_and: + case S2_asr_i_p_nac: + case S2_asr_i_p_or: + case S2_lsr_i_p_acc: + case S2_lsr_i_p_and: + case S2_lsr_i_p_nac: + case S2_lsr_i_p_or: + case S2_lsr_i_p_xacc: + ImN = 3; + RegN = 2; + break; + case S2_asr_i_r: + case S2_lsr_i_r: + ImN = 2; + RegN = 1; + break; + case S4_andi_lsr_ri: + case S4_ori_lsr_ri: + case S4_addi_lsr_ri: + case S4_subi_lsr_ri: + case S2_asr_i_r_acc: + case S2_asr_i_r_and: + case S2_asr_i_r_nac: + case S2_asr_i_r_or: + case S2_lsr_i_r_acc: + case S2_lsr_i_r_and: + case S2_lsr_i_r_nac: + case S2_lsr_i_r_or: + case S2_lsr_i_r_xacc: + ImN = 3; + RegN = 2; + break; + + default: + return false; + } + + if (RegN != OpN) + return false; + + assert(MI.getOperand(ImN).isImm()); + unsigned S = MI.getOperand(ImN).getImm(); + LostB = 0; + LostE = S; + return true; +} + + +// Calculate the bit vector that corresponds to the used bits of register Reg. +// The vector Bits has the same size, as the size of Reg in bits. If the cal- +// culation fails (i.e. the used bits are unknown), it returns false. Other- +// wise, it returns true and sets the corresponding bits in Bits. +bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { + BitVector Used(Bits.size()); + RegisterSet Visited; + std::vector<unsigned> Pending; + Pending.push_back(Reg); + + for (unsigned i = 0; i < Pending.size(); ++i) { + unsigned R = Pending[i]; + if (Visited.has(R)) + continue; + Visited.insert(R); + for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { + BitTracker::RegisterRef UR = *I; + unsigned B, W; + if (!HBS::getSubregMask(UR, B, W, MRI)) + return false; + MachineInstr &UseI = *I->getParent(); + if (UseI.isPHI() || UseI.isCopy()) { + unsigned DefR = UseI.getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefR)) + return false; + Pending.push_back(DefR); + } else { + if (!computeUsedBits(UseI, I.getOperandNo(), Used, B)) + return false; + } + } + } + Bits |= Used; + return true; +} + + +// Calculate the bits used by instruction MI in a register in operand OpN. +// Return true/false if the calculation succeeds/fails. If is succeeds, set +// used bits in Bits. This function does not reset any bits in Bits, so +// subsequent calls over different instructions will result in the union +// of the used bits in all these instructions. +// The register in question may be used with a sub-register, whereas Bits +// holds the bits for the entire register. To keep track of that, the +// argument Begin indicates where in Bits is the lowest-significant bit +// of the register used in operand OpN. For example, in instruction: +// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10 +// the operand 1 is a 32-bit register, which happens to be a subregister +// of the 64-bit register vreg2, and that subregister starts at position 32. +// In this case Begin=32, since Bits[32] would be the lowest-significant bit +// of vreg2:subreg_hireg. +bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, + unsigned OpN, BitVector &Bits, uint16_t Begin) { + unsigned Opc = MI.getOpcode(); + BitVector T(Bits.size()); + bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII); + // Even if we don't have bits yet, we could still provide some information + // if the instruction is a lossy shift: the lost bits will be marked as + // not used. + unsigned LB, LE; + if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) { + assert(MI.getOperand(OpN).isReg()); + BitTracker::RegisterRef RR = MI.getOperand(OpN); + const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); + uint16_t Width = RC->getSize()*8; + + if (!GotBits) + T.set(Begin, Begin+Width); + assert(LB <= LE && LB < Width && LE <= Width); + T.reset(Begin+LB, Begin+LE); + GotBits = true; + } + if (GotBits) + Bits |= T; + return GotBits; +} + + +// Calculates the used bits in RD ("defined register"), and checks if these +// bits in RS ("used register") and RD are identical. +bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, + BitTracker::RegisterRef RS) { + const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + + unsigned DB, DW; + if (!HBS::getSubregMask(RD, DB, DW, MRI)) + return false; + unsigned SB, SW; + if (!HBS::getSubregMask(RS, SB, SW, MRI)) + return false; + if (SW != DW) + return false; + + BitVector Used(DC.width()); + if (!computeUsedBits(RD.Reg, Used)) + return false; + + for (unsigned i = 0; i != DW; ++i) + if (Used[i+DB] && DC[DB+i] != SC[SB+i]) + return false; + return true; +} + + +bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, + const RegisterSet&) { + bool Changed = false; + + for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) { + NextI = std::next(I); + MachineInstr *MI = &*I; + + if (MI->getOpcode() == TargetOpcode::COPY) + continue; + if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) + continue; + unsigned NumD = MI->getDesc().getNumDefs(); + if (NumD != 1) + continue; + + BitTracker::RegisterRef RD = MI->getOperand(0); + if (!BT.has(RD.Reg)) + continue; + const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); + + // Find a source operand that is equal to the result. + for (auto &Op : MI->uses()) { + if (!Op.isReg()) + continue; + BitTracker::RegisterRef RS = Op; + if (!BT.has(RS.Reg)) + continue; + if (!HBS::isTransparentCopy(RD, RS, MRI)) + continue; + + unsigned BN, BW; + if (!HBS::getSubregMask(RS, BN, BW, MRI)) + continue; + + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW)) + continue; + + // If found, replace the instruction with a COPY. + DebugLoc DL = MI->getDebugLoc(); + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(RS.Reg, 0, RS.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), SC); + Changed = true; + break; + } + } + + return Changed; +} + + +// +// Const generation +// +// Recognize instructions that produce constant values known at compile-time. +// Replace them with register definitions that load these constants directly. +namespace { + class ConstGeneration : public Transformation { + public: + ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool isTfrConst(const MachineInstr *MI) const; + bool isConst(unsigned R, int64_t &V) const; + unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, + MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + +bool ConstGeneration::isConst(unsigned R, int64_t &C) const { + if (!BT.has(R)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(R); + int64_t T = 0; + for (unsigned i = RC.width(); i > 0; --i) { + const BitTracker::BitValue &V = RC[i-1]; + T <<= 1; + if (V.is(1)) + T |= 1; + else if (!V.is(0)) + return false; + } + C = T; + return true; +} + + +bool ConstGeneration::isTfrConst(const MachineInstr *MI) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + case Hexagon::A2_tfrsi: + case Hexagon::A2_tfrpi: + case Hexagon::TFR_PdTrue: + case Hexagon::TFR_PdFalse: + case Hexagon::CONST32_Int_Real: + case Hexagon::CONST64_Int_Real: + return true; + } + return false; +} + + +// Generate a transfer-immediate instruction that is appropriate for the +// register class and the actual value being transferred. +unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, + MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { + unsigned Reg = MRI.createVirtualRegister(RC); + if (RC == &Hexagon::IntRegsRegClass) { + BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) + .addImm(int32_t(C)); + return Reg; + } + + if (RC == &Hexagon::DoubleRegsRegClass) { + if (isInt<8>(C)) { + BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg) + .addImm(C); + return Reg; + } + + unsigned Lo = Lo_32(C), Hi = Hi_32(C); + if (isInt<8>(Lo) || isInt<8>(Hi)) { + unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii + : Hexagon::A4_combineii; + BuildMI(B, At, DL, HII.get(Opc), Reg) + .addImm(int32_t(Hi)) + .addImm(int32_t(Lo)); + return Reg; + } + + BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg) + .addImm(C); + return Reg; + } + + if (RC == &Hexagon::PredRegsRegClass) { + unsigned Opc; + if (C == 0) + Opc = Hexagon::TFR_PdFalse; + else if ((C & 0xFF) == 0xFF) + Opc = Hexagon::TFR_PdTrue; + else + return 0; + BuildMI(B, At, DL, HII.get(Opc), Reg); + return Reg; + } + + return 0; +} + + +bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { + bool Changed = false; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(); I != E; ++I) { + if (isTfrConst(I)) + continue; + Defs.clear(); + HBS::getInstrDefs(*I, Defs); + if (Defs.count() != 1) + continue; + unsigned DR = Defs.find_first(); + if (!TargetRegisterInfo::isVirtualRegister(DR)) + continue; + int64_t C; + if (isConst(DR, C)) { + DebugLoc DL = I->getDebugLoc(); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); + if (ImmReg) { + HBS::replaceReg(DR, ImmReg, MRI); + BT.put(ImmReg, BT.lookup(DR)); + Changed = true; + } + } + } + return Changed; +} + + +// +// Copy generation +// +// Identify pairs of available registers which hold identical values. +// In such cases, only one of them needs to be calculated, the other one +// will be defined as a copy of the first. +// +// Copy propagation +// +// Eliminate register copies RD = RS, by replacing the uses of RD with +// with uses of RS. +namespace { + class CopyGeneration : public Transformation { + public: + CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool findMatch(const BitTracker::RegisterRef &Inp, + BitTracker::RegisterRef &Out, const RegisterSet &AVs); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; + + class CopyPropagation : public Transformation { + public: + CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(false), MRI(mri) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + static bool isCopyReg(unsigned Opc); + private: + bool propagateRegCopy(MachineInstr &MI); + + MachineRegisterInfo &MRI; + }; + +} + + +/// Check if there is a register in AVs that is identical to Inp. If so, +/// set Out to the found register. The output may be a pair Reg:Sub. +bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, + BitTracker::RegisterRef &Out, const RegisterSet &AVs) { + if (!BT.has(Inp.Reg)) + return false; + const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); + unsigned B, W; + if (!HBS::getSubregMask(Inp, B, W, MRI)) + return false; + + for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) { + if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI)) + continue; + const BitTracker::RegisterCell &RC = BT.lookup(R); + unsigned RW = RC.width(); + if (W == RW) { + if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R)) + continue; + if (!HBS::isEqual(InpRC, B, RC, 0, W)) + continue; + Out.Reg = R; + Out.Sub = 0; + return true; + } + // Check if there is a super-register, whose part (with a subregister) + // is equal to the input. + // Only do double registers for now. + if (W*2 != RW) + continue; + if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass) + continue; + + if (HBS::isEqual(InpRC, B, RC, 0, W)) + Out.Sub = Hexagon::subreg_loreg; + else if (HBS::isEqual(InpRC, B, RC, W, W)) + Out.Sub = Hexagon::subreg_hireg; + else + continue; + Out.Reg = R; + return true; + } + return false; +} + + +bool CopyGeneration::processBlock(MachineBasicBlock &B, + const RegisterSet &AVs) { + RegisterSet AVB(AVs); + bool Changed = false; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(), NextI = I; I != E; + ++I, AVB.insert(Defs)) { + NextI = std::next(I); + Defs.clear(); + HBS::getInstrDefs(*I, Defs); + + unsigned Opc = I->getOpcode(); + if (CopyPropagation::isCopyReg(Opc)) + continue; + + for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) { + BitTracker::RegisterRef MR; + if (!findMatch(R, MR, AVB)) + continue; + DebugLoc DL = I->getDebugLoc(); + auto *FRC = HBS::getFinalVRegClass(MR, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(MR.Reg, 0, MR.Sub); + BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); + } + } + + return Changed; +} + + +bool CopyPropagation::isCopyReg(unsigned Opc) { + switch (Opc) { + case TargetOpcode::COPY: + case TargetOpcode::REG_SEQUENCE: + case Hexagon::A2_tfr: + case Hexagon::A2_tfrp: + case Hexagon::A2_combinew: + case Hexagon::A4_combineir: + case Hexagon::A4_combineri: + return true; + default: + break; + } + return false; +} + + +bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { + bool Changed = false; + unsigned Opc = MI.getOpcode(); + BitTracker::RegisterRef RD = MI.getOperand(0); + assert(MI.getOperand(0).getSubReg() == 0); + + switch (Opc) { + case TargetOpcode::COPY: + case Hexagon::A2_tfr: + case Hexagon::A2_tfrp: { + BitTracker::RegisterRef RS = MI.getOperand(1); + if (!HBS::isTransparentCopy(RD, RS, MRI)) + break; + if (RS.Sub != 0) + Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI); + else + Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI); + break; + } + case TargetOpcode::REG_SEQUENCE: { + BitTracker::RegisterRef SL, SH; + if (HBS::parseRegSequence(MI, SL, SH)) { + Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, + SL.Reg, SL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, + SH.Reg, SH.Sub, MRI); + } + break; + } + case Hexagon::A2_combinew: { + BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); + Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, + RL.Reg, RL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, + RH.Reg, RH.Sub, MRI); + break; + } + case Hexagon::A4_combineir: + case Hexagon::A4_combineri: { + unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; + unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg + : Hexagon::subreg_hireg; + BitTracker::RegisterRef RS = MI.getOperand(SrcX); + Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); + break; + } + } + return Changed; +} + + +bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { + std::vector<MachineInstr*> Instrs; + for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) + Instrs.push_back(&*I); + + bool Changed = false; + for (auto I : Instrs) { + unsigned Opc = I->getOpcode(); + if (!CopyPropagation::isCopyReg(Opc)) + continue; + Changed |= propagateRegCopy(*I); + } + + return Changed; +} + + +// +// Bit simplification +// +// Recognize patterns that can be simplified and replace them with the +// simpler forms. +// This is by no means complete +namespace { + class BitSimplification : public Transformation { + public: + BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + struct RegHalf : public BitTracker::RegisterRef { + bool Low; // Low/High halfword. + }; + + bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, + unsigned B, RegHalf &RH); + + bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, + BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); + unsigned getCombineOpcode(bool HLow, bool LLow); + + bool genStoreUpperHalf(MachineInstr *MI); + bool genStoreImmediate(MachineInstr *MI); + bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + + +// Check if the bits [B..B+16) in register cell RC form a valid halfword, +// i.e. [0..16), [16..32), etc. of some register. If so, return true and +// set the information about the found register in RH. +bool BitSimplification::matchHalf(unsigned SelfR, + const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { + // XXX This could be searching in the set of available registers, in case + // the match is not exact. + + // Match 16-bit chunks, where the RC[B..B+15] references exactly one + // register and all the bits B..B+15 match between RC and the register. + // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, + // and RC = { [0]:0 [1-15]:v1[1-15]... }. + bool Low = false; + unsigned I = B; + while (I < B+16 && RC[I].num()) + I++; + if (I == B+16) + return false; + + unsigned Reg = RC[I].RefI.Reg; + unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. + if (P < I-B) + return false; + unsigned Pos = P - (I-B); + + if (Reg == 0 || Reg == SelfR) // Don't match "self". + return false; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (!BT.has(Reg)) + return false; + + const BitTracker::RegisterCell &SC = BT.lookup(Reg); + if (Pos+16 > SC.width()) + return false; + + for (unsigned i = 0; i < 16; ++i) { + const BitTracker::BitValue &RV = RC[i+B]; + if (RV.Type == BitTracker::BitValue::Ref) { + if (RV.RefI.Reg != Reg) + return false; + if (RV.RefI.Pos != i+Pos) + return false; + continue; + } + if (RC[i+B] != SC[i+Pos]) + return false; + } + + unsigned Sub = 0; + switch (Pos) { + case 0: + Sub = Hexagon::subreg_loreg; + Low = true; + break; + case 16: + Sub = Hexagon::subreg_loreg; + Low = false; + break; + case 32: + Sub = Hexagon::subreg_hireg; + Low = true; + break; + case 48: + Sub = Hexagon::subreg_hireg; + Low = false; + break; + default: + return false; + } + + RH.Reg = Reg; + RH.Sub = Sub; + RH.Low = Low; + // If the subregister is not valid with the register, set it to 0. + if (!HBS::getFinalVRegClass(RH, MRI)) + RH.Sub = 0; + + return true; +} + + +// Check if RC matches the pattern of a S2_packhl. If so, return true and +// set the inputs Rs and Rt. +bool BitSimplification::matchPackhl(unsigned SelfR, + const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, + BitTracker::RegisterRef &Rt) { + RegHalf L1, H1, L2, H2; + + if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1)) + return false; + if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1)) + return false; + + // Rs = H1.L1, Rt = H2.L2 + if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) + return false; + if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) + return false; + + Rs = H1; + Rt = H2; + return true; +} + + +unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { + return HLow ? LLow ? Hexagon::A2_combine_ll + : Hexagon::A2_combine_lh + : LLow ? Hexagon::A2_combine_hl + : Hexagon::A2_combine_hh; +} + + +// If MI stores the upper halfword of a register (potentially obtained via +// shifts or extracts), replace it with a storerf instruction. This could +// cause the "extraction" code to become dead. +bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (Opc != Hexagon::S2_storerh_io) + return false; + + MachineOperand &ValOp = MI->getOperand(2); + BitTracker::RegisterRef RS = ValOp; + if (!BT.has(RS.Reg)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); + RegHalf H; + if (!matchHalf(0, RC, 0, H)) + return false; + if (H.Low) + return false; + MI->setDesc(HII.get(Hexagon::S2_storerf_io)); + ValOp.setReg(H.Reg); + ValOp.setSubReg(H.Sub); + return true; +} + + +// If MI stores a value known at compile-time, and the value is within a range +// that avoids using constant-extenders, replace it with a store-immediate. +bool BitSimplification::genStoreImmediate(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + unsigned Align = 0; + switch (Opc) { + case Hexagon::S2_storeri_io: + Align++; + case Hexagon::S2_storerh_io: + Align++; + case Hexagon::S2_storerb_io: + break; + default: + return false; + } + + // Avoid stores to frame-indices (due to an unknown offset). + if (!MI->getOperand(0).isReg()) + return false; + MachineOperand &OffOp = MI->getOperand(1); + if (!OffOp.isImm()) + return false; + + int64_t Off = OffOp.getImm(); + // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). + if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1))) + return false; + // Source register: + BitTracker::RegisterRef RS = MI->getOperand(2); + if (!BT.has(RS.Reg)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); + uint64_t U; + if (!HBS::getConst(RC, 0, RC.width(), U)) + return false; + + // Only consider 8-bit values to avoid constant-extenders. + int V; + switch (Opc) { + case Hexagon::S2_storerb_io: + V = int8_t(U); + break; + case Hexagon::S2_storerh_io: + V = int16_t(U); + break; + case Hexagon::S2_storeri_io: + V = int32_t(U); + break; + } + if (!isInt<8>(V)) + return false; + + MI->RemoveOperand(2); + switch (Opc) { + case Hexagon::S2_storerb_io: + MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); + break; + case Hexagon::S2_storerh_io: + MI->setDesc(HII.get(Hexagon::S4_storeirh_io)); + break; + case Hexagon::S2_storeri_io: + MI->setDesc(HII.get(Hexagon::S4_storeiri_io)); + break; + } + MI->addOperand(MachineOperand::CreateImm(V)); + return true; +} + + +// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the +// last instruction in a sequence that results in something equivalent to +// the pack-halfwords. The intent is to cause the entire sequence to become +// dead. +bool BitSimplification::genPackhl(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + if (Opc == Hexagon::S2_packhl) + return false; + BitTracker::RegisterRef Rs, Rt; + if (!matchPackhl(RD.Reg, RC, Rs, Rt)) + return false; + + MachineBasicBlock &B = *MI->getParent(); + unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR) + .addReg(Rs.Reg, 0, Rs.Sub) + .addReg(Rt.Reg, 0, Rt.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI produces halfword of the input in the low half of the output, +// replace it with zero-extend or extractu. +bool BitSimplification::genExtractHalf(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + RegHalf L; + // Check for halfword in low 16 bits, zeros elsewhere. + if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16)) + return false; + + unsigned Opc = MI->getOpcode(); + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + // Prefer zxth, since zxth can go in any slot, while extractu only in + // slots 2 and 3. + unsigned NewR = 0; + if (L.Low && Opc != Hexagon::A2_zxth) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR) + .addReg(L.Reg, 0, L.Sub); + } else if (!L.Low && Opc != Hexagon::S2_extractu) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR) + .addReg(L.Reg, 0, L.Sub) + .addImm(16) + .addImm(16); + } + if (NewR == 0) + return false; + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the +// combine. +bool BitSimplification::genCombineHalf(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + RegHalf L, H; + // Check for combine h/l + if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H)) + return false; + // Do nothing if this is just a reg copy. + if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) + return false; + + unsigned Opc = MI->getOpcode(); + unsigned COpc = getCombineOpcode(H.Low, L.Low); + if (COpc == Opc) + return false; + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(COpc), NewR) + .addReg(H.Reg, 0, H.Sub) + .addReg(L.Reg, 0, L.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI resets high bits of a register and keeps the lower ones, replace it +// with zero-extend byte/half, and-immediate, or extractu, as appropriate. +bool BitSimplification::genExtractLow(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + case Hexagon::S2_extractu: + return false; + } + if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) { + int32_t Imm = MI->getOperand(2).getImm(); + if (isInt<10>(Imm)) + return false; + } + + if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) + return false; + unsigned W = RC.width(); + while (W > 0 && RC[W-1].is(0)) + W--; + if (W == 0 || W == RC.width()) + return false; + unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb + : (W == 16) ? Hexagon::A2_zxth + : (W < 10) ? Hexagon::A2_andir + : Hexagon::S2_extractu; + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + for (auto &Op : MI->uses()) { + if (!Op.isReg()) + continue; + BitTracker::RegisterRef RS = Op; + if (!BT.has(RS.Reg)) + continue; + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + unsigned BN, BW; + if (!HBS::getSubregMask(RS, BN, BW, MRI)) + continue; + if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W)) + continue; + + unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR) + .addReg(RS.Reg, 0, RS.Sub); + if (NewOpc == Hexagon::A2_andir) + MIB.addImm((1 << W) - 1); + else if (NewOpc == Hexagon::S2_extractu) + MIB.addImm(W).addImm(0); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; + } + return false; +} + + +// Check for tstbit simplification opportunity, where the bit being checked +// can be tracked back to another register. For example: +// vreg2 = S2_lsr_i_r vreg1, 5 +// vreg3 = S2_tstbit_i vreg2, 0 +// => +// vreg3 = S2_tstbit_i vreg1, 5 +bool BitSimplification::simplifyTstbit(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + if (Opc != Hexagon::S2_tstbit_i) + return false; + + unsigned BN = MI->getOperand(2).getImm(); + BitTracker::RegisterRef RS = MI->getOperand(1); + unsigned F, W; + DebugLoc DL = MI->getDebugLoc(); + if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI)) + return false; + MachineBasicBlock &B = *MI->getParent(); + + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + const BitTracker::BitValue &V = SC[F+BN]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { + const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg); + // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is + // a double register, need to use a subregister and adjust bit + // number. + unsigned P = UINT_MAX; + BitTracker::RegisterRef RR(V.RefI.Reg, 0); + if (TC == &Hexagon::DoubleRegsRegClass) { + P = V.RefI.Pos; + RR.Sub = Hexagon::subreg_loreg; + if (P >= 32) { + P -= 32; + RR.Sub = Hexagon::subreg_hireg; + } + } else if (TC == &Hexagon::IntRegsRegClass) { + P = V.RefI.Pos; + } + if (P != UINT_MAX) { + unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR) + .addReg(RR.Reg, 0, RR.Sub) + .addImm(P); + HBS::replaceReg(RD.Reg, NewR, MRI); + BT.put(NewR, RC); + return true; + } + } else if (V.is(0) || V.is(1)) { + unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue; + BuildMI(B, MI, DL, HII.get(NewOpc), NewR); + HBS::replaceReg(RD.Reg, NewR, MRI); + return true; + } + + return false; +} + + +bool BitSimplification::processBlock(MachineBasicBlock &B, + const RegisterSet &AVs) { + bool Changed = false; + RegisterSet AVB = AVs; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) { + MachineInstr *MI = &*I; + Defs.clear(); + HBS::getInstrDefs(*MI, Defs); + + unsigned Opc = MI->getOpcode(); + if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) + continue; + + if (MI->mayStore()) { + bool T = genStoreUpperHalf(MI); + T = T || genStoreImmediate(MI); + Changed |= T; + continue; + } + + if (Defs.count() != 1) + continue; + const MachineOperand &Op0 = MI->getOperand(0); + if (!Op0.isReg() || !Op0.isDef()) + continue; + BitTracker::RegisterRef RD = Op0; + if (!BT.has(RD.Reg)) + continue; + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg); + + if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { + bool T = genPackhl(MI, RD, RC); + Changed |= T; + continue; + } + + if (FRC->getID() == Hexagon::IntRegsRegClassID) { + bool T = genExtractHalf(MI, RD, RC); + T = T || genCombineHalf(MI, RD, RC); + T = T || genExtractLow(MI, RD, RC); + Changed |= T; + continue; + } + + if (FRC->getID() == Hexagon::PredRegsRegClassID) { + bool T = simplifyTstbit(MI, RD, RC); + Changed |= T; + continue; + } + } + return Changed; +} + + +bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HRI = *HST.getRegisterInfo(); + auto &HII = *HST.getInstrInfo(); + + MDT = &getAnalysis<MachineDominatorTree>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool Changed; + + Changed = DeadCodeElimination(MF, *MDT).run(); + + const HexagonEvaluator HE(HRI, MRI, HII, MF); + BitTracker BT(HE, MF); + DEBUG(BT.trace(true)); + BT.run(); + + MachineBasicBlock &Entry = MF.front(); + + RegisterSet AIG; // Available registers for IG. + ConstGeneration ImmG(BT, HII, MRI); + Changed |= visitBlock(Entry, ImmG, AIG); + + RegisterSet ARE; // Available registers for RIE. + RedundantInstrElimination RIE(BT, HII, MRI); + Changed |= visitBlock(Entry, RIE, ARE); + + RegisterSet ACG; // Available registers for CG. + CopyGeneration CopyG(BT, HII, MRI); + Changed |= visitBlock(Entry, CopyG, ACG); + + RegisterSet ACP; // Available registers for CP. + CopyPropagation CopyP(HRI, MRI); + Changed |= visitBlock(Entry, CopyP, ACP); + + Changed = DeadCodeElimination(MF, *MDT).run() || Changed; + + BT.run(); + RegisterSet ABS; // Available registers for BS. + BitSimplification BitS(BT, HII, MRI); + Changed |= visitBlock(Entry, BitS, ABS); + + Changed = DeadCodeElimination(MF, *MDT).run() || Changed; + + if (Changed) { + for (auto &B : MF) + for (auto &I : B) + I.clearKillInfo(); + DeadCodeElimination(MF, *MDT).run(); + } + return Changed; +} + + +// Recognize loops where the code at the end of the loop matches the code +// before the entry of the loop, and the matching code is such that is can +// be simplified. This pass relies on the bit simplification above and only +// prepares code in a way that can be handled by the bit simplifcation. +// +// This is the motivating testcase (and explanation): +// +// { +// loop0(.LBB0_2, r1) // %for.body.preheader +// r5:4 = memd(r0++#8) +// } +// { +// r3 = lsr(r4, #16) +// r7:6 = combine(r5, r5) +// } +// { +// r3 = insert(r5, #16, #16) +// r7:6 = vlsrw(r7:6, #16) +// } +// .LBB0_2: +// { +// memh(r2+#4) = r5 +// memh(r2+#6) = r6 # R6 is really R5.H +// } +// { +// r2 = add(r2, #8) +// memh(r2+#0) = r4 +// memh(r2+#2) = r3 # R3 is really R4.H +// } +// { +// r5:4 = memd(r0++#8) +// } +// { # "Shuffling" code that sets up R3 and R6 +// r3 = lsr(r4, #16) # so that their halves can be stored in the +// r7:6 = combine(r5, r5) # next iteration. This could be folded into +// } # the stores if the code was at the beginning +// { # of the loop iteration. Since the same code +// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved +// r7:6 = vlsrw(r7:6, #16) # there. +// }:endloop0 +// +// +// The outcome: +// +// { +// loop0(.LBB0_2, r1) +// r5:4 = memd(r0++#8) +// } +// .LBB0_2: +// { +// memh(r2+#4) = r5 +// memh(r2+#6) = r5.h +// } +// { +// r2 = add(r2, #8) +// memh(r2+#0) = r4 +// memh(r2+#2) = r4.h +// } +// { +// r5:4 = memd(r0++#8) +// }:endloop0 + +namespace llvm { + FunctionPass *createHexagonLoopRescheduling(); + void initializeHexagonLoopReschedulingPass(PassRegistry&); +} + +namespace { + class HexagonLoopRescheduling : public MachineFunctionPass { + public: + static char ID; + HexagonLoopRescheduling() : MachineFunctionPass(ID), + HII(0), HRI(0), MRI(0), BTP(0) { + initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + MachineRegisterInfo *MRI; + BitTracker *BTP; + + struct LoopCand { + LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, + MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} + MachineBasicBlock *LB, *PB, *EB; + }; + typedef std::vector<MachineInstr*> InstrList; + struct InstrGroup { + BitTracker::RegisterRef Inp, Out; + InstrList Ins; + }; + struct PhiInfo { + PhiInfo(MachineInstr &P, MachineBasicBlock &B); + unsigned DefR; + BitTracker::RegisterRef LR, PR; + MachineBasicBlock *LB, *PB; + }; + + static unsigned getDefReg(const MachineInstr *MI); + bool isConst(unsigned Reg) const; + bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; + bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; + bool isShuffleOf(unsigned OutR, unsigned InpR) const; + bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, + unsigned &InpR2) const; + void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, + MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); + bool processLoop(LoopCand &C); + }; +} + +char HexagonLoopRescheduling::ID = 0; + +INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", + "Hexagon Loop Rescheduling", false, false) + + +HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, + MachineBasicBlock &B) { + DefR = HexagonLoopRescheduling::getDefReg(&P); + LB = &B; + PB = nullptr; + for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { + const MachineOperand &OpB = P.getOperand(i+1); + if (OpB.getMBB() == &B) { + LR = P.getOperand(i); + continue; + } + PB = OpB.getMBB(); + PR = P.getOperand(i); + } +} + + +unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { + RegisterSet Defs; + HBS::getInstrDefs(*MI, Defs); + if (Defs.count() != 1) + return 0; + return Defs.find_first(); +} + + +bool HexagonLoopRescheduling::isConst(unsigned Reg) const { + if (!BTP->has(Reg)) + return false; + const BitTracker::RegisterCell &RC = BTP->lookup(Reg); + for (unsigned i = 0, w = RC.width(); i < w; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (!V.is(0) && !V.is(1)) + return false; + } + return true; +} + + +bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, + unsigned DefR) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::COPY: + case Hexagon::S2_lsr_i_r: + case Hexagon::S2_asr_i_r: + case Hexagon::S2_asl_i_r: + case Hexagon::S2_lsr_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_asl_i_p: + case Hexagon::S2_insert: + case Hexagon::A2_or: + case Hexagon::A2_orp: + case Hexagon::A2_and: + case Hexagon::A2_andp: + case Hexagon::A2_combinew: + case Hexagon::A4_combineri: + case Hexagon::A4_combineir: + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + case Hexagon::A2_combine_ll: + case Hexagon::A2_combine_lh: + case Hexagon::A2_combine_hl: + case Hexagon::A2_combine_hh: + return true; + } + return false; +} + + +bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, + unsigned InpR) const { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg()) + continue; + if (Op.getReg() == InpR) + return i == n-1; + } + return false; +} + + +bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { + if (!BTP->has(OutR) || !BTP->has(InpR)) + return false; + const BitTracker::RegisterCell &OutC = BTP->lookup(OutR); + for (unsigned i = 0, w = OutC.width(); i < w; ++i) { + const BitTracker::BitValue &V = OutC[i]; + if (V.Type != BitTracker::BitValue::Ref) + continue; + if (V.RefI.Reg != InpR) + return false; + } + return true; +} + + +bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, + unsigned OutR2, unsigned &InpR2) const { + if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2)) + return false; + const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1); + const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2); + unsigned W = OutC1.width(); + unsigned MatchR = 0; + if (W != OutC2.width()) + return false; + for (unsigned i = 0; i < W; ++i) { + const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; + if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) + return false; + if (V1.Type != BitTracker::BitValue::Ref) + continue; + if (V1.RefI.Pos != V2.RefI.Pos) + return false; + if (V1.RefI.Reg != InpR1) + return false; + if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) + return false; + if (!MatchR) + MatchR = V2.RefI.Reg; + else if (V2.RefI.Reg != MatchR) + return false; + } + InpR2 = MatchR; + return true; +} + + +void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, + MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, + unsigned NewPredR) { + DenseMap<unsigned,unsigned> RegMap; + + const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); + unsigned PhiR = MRI->createVirtualRegister(PhiRC); + BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) + .addReg(NewPredR) + .addMBB(&PB) + .addReg(G.Inp.Reg) + .addMBB(&LB); + RegMap.insert(std::make_pair(G.Inp.Reg, PhiR)); + + for (unsigned i = G.Ins.size(); i > 0; --i) { + const MachineInstr *SI = G.Ins[i-1]; + unsigned DR = getDefReg(SI); + const TargetRegisterClass *RC = MRI->getRegClass(DR); + unsigned NewDR = MRI->createVirtualRegister(RC); + DebugLoc DL = SI->getDebugLoc(); + + auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); + for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) { + const MachineOperand &Op = SI->getOperand(j); + if (!Op.isReg()) { + MIB.addOperand(Op); + continue; + } + if (!Op.isUse()) + continue; + unsigned UseR = RegMap[Op.getReg()]; + MIB.addReg(UseR, 0, Op.getSubReg()); + } + RegMap.insert(std::make_pair(DR, NewDR)); + } + + HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); +} + + +bool HexagonLoopRescheduling::processLoop(LoopCand &C) { + DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); + std::vector<PhiInfo> Phis; + for (auto &I : *C.LB) { + if (!I.isPHI()) + break; + unsigned PR = getDefReg(&I); + if (isConst(PR)) + continue; + bool BadUse = false, GoodUse = false; + for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseI = UI->getParent(); + if (UseI->getParent() != C.LB) { + BadUse = true; + break; + } + if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR)) + GoodUse = true; + } + if (BadUse || !GoodUse) + continue; + + Phis.push_back(PhiInfo(I, *C.LB)); + } + + DEBUG({ + dbgs() << "Phis: {"; + for (auto &I : Phis) { + dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi(" + << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() + << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" + << I.LB->getNumber() << ')'; + } + dbgs() << " }\n"; + }); + + if (Phis.empty()) + return false; + + bool Changed = false; + InstrList ShufIns; + + // Go backwards in the block: for each bit shuffling instruction, check + // if that instruction could potentially be moved to the front of the loop: + // the output of the loop cannot be used in a non-shuffling instruction + // in this loop. + for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) { + if (I->isTerminator()) + continue; + if (I->isPHI()) + break; + + RegisterSet Defs; + HBS::getInstrDefs(*I, Defs); + if (Defs.count() != 1) + continue; + unsigned DefR = Defs.find_first(); + if (!TargetRegisterInfo::isVirtualRegister(DefR)) + continue; + if (!isBitShuffle(&*I, DefR)) + continue; + + bool BadUse = false; + for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseI = UI->getParent(); + if (UseI->getParent() == C.LB) { + if (UseI->isPHI()) { + // If the use is in a phi node in this loop, then it should be + // the value corresponding to the back edge. + unsigned Idx = UI.getOperandNo(); + if (UseI->getOperand(Idx+1).getMBB() != C.LB) + BadUse = true; + } else { + auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI); + if (F == ShufIns.end()) + BadUse = true; + } + } else { + // There is a use outside of the loop, but there is no epilog block + // suitable for a copy-out. + if (C.EB == nullptr) + BadUse = true; + } + if (BadUse) + break; + } + + if (BadUse) + continue; + ShufIns.push_back(&*I); + } + + // Partition the list of shuffling instructions into instruction groups, + // where each group has to be moved as a whole (i.e. a group is a chain of + // dependent instructions). A group produces a single live output register, + // which is meant to be the input of the loop phi node (although this is + // not checked here yet). It also uses a single register as its input, + // which is some value produced in the loop body. After moving the group + // to the beginning of the loop, that input register would need to be + // the loop-carried register (through a phi node) instead of the (currently + // loop-carried) output register. + typedef std::vector<InstrGroup> InstrGroupList; + InstrGroupList Groups; + + for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { + MachineInstr *SI = ShufIns[i]; + if (SI == nullptr) + continue; + + InstrGroup G; + G.Ins.push_back(SI); + G.Out.Reg = getDefReg(SI); + RegisterSet Inputs; + HBS::getInstrUses(*SI, Inputs); + + for (unsigned j = i+1; j < n; ++j) { + MachineInstr *MI = ShufIns[j]; + if (MI == nullptr) + continue; + RegisterSet Defs; + HBS::getInstrDefs(*MI, Defs); + // If this instruction does not define any pending inputs, skip it. + if (!Defs.intersects(Inputs)) + continue; + // Otherwise, add it to the current group and remove the inputs that + // are defined by MI. + G.Ins.push_back(MI); + Inputs.remove(Defs); + // Then add all registers used by MI. + HBS::getInstrUses(*MI, Inputs); + ShufIns[j] = nullptr; + } + + // Only add a group if it requires at most one register. + if (Inputs.count() > 1) + continue; + auto LoopInpEq = [G] (const PhiInfo &P) -> bool { + return G.Out.Reg == P.LR.Reg; + }; + if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end()) + continue; + + G.Inp.Reg = Inputs.find_first(); + Groups.push_back(G); + } + + DEBUG({ + for (unsigned i = 0, n = Groups.size(); i < n; ++i) { + InstrGroup &G = Groups[i]; + dbgs() << "Group[" << i << "] inp: " + << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub) + << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; + for (unsigned j = 0, m = G.Ins.size(); j < m; ++j) + dbgs() << " " << *G.Ins[j]; + } + }); + + for (unsigned i = 0, n = Groups.size(); i < n; ++i) { + InstrGroup &G = Groups[i]; + if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) + continue; + auto LoopInpEq = [G] (const PhiInfo &P) -> bool { + return G.Out.Reg == P.LR.Reg; + }; + auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq); + if (F == Phis.end()) + continue; + unsigned PredR = 0; + if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) { + const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg); + unsigned Opc = DefPredR->getOpcode(); + if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) + continue; + if (!DefPredR->getOperand(1).isImm()) + continue; + if (DefPredR->getOperand(1).getImm() != 0) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); + if (RC != MRI->getRegClass(F->PR.Reg)) { + PredR = MRI->createVirtualRegister(RC); + unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi + : Hexagon::A2_tfrpi; + auto T = C.PB->getFirstTerminator(); + DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); + BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR) + .addImm(0); + } else { + PredR = F->PR.Reg; + } + } + assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg)); + moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR); + Changed = true; + } + + return Changed; +} + + +bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + HII = HST.getInstrInfo(); + HRI = HST.getRegisterInfo(); + MRI = &MF.getRegInfo(); + const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); + BitTracker BT(HE, MF); + DEBUG(BT.trace(true)); + BT.run(); + BTP = &BT; + + std::vector<LoopCand> Cand; + + for (auto &B : MF) { + if (B.pred_size() != 2 || B.succ_size() != 2) + continue; + MachineBasicBlock *PB = nullptr; + bool IsLoop = false; + for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) { + if (*PI != &B) + PB = *PI; + else + IsLoop = true; + } + if (!IsLoop) + continue; + + MachineBasicBlock *EB = nullptr; + for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) { + if (*SI == &B) + continue; + // Set EP to the epilog block, if it has only 1 predecessor (i.e. the + // edge from B to EP is non-critical. + if ((*SI)->pred_size() == 1) + EB = *SI; + break; + } + + Cand.push_back(LoopCand(&B, PB, EB)); + } + + bool Changed = false; + for (auto &C : Cand) + Changed |= processLoop(C); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonLoopRescheduling() { + return new HexagonLoopRescheduling(); +} + +FunctionPass *llvm::createHexagonBitSimplify() { + return new HexagonBitSimplify(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp new file mode 100644 index 0000000..d5848dc --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -0,0 +1,1175 @@ +//===--- HexagonBitTracker.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonBitTracker.h" + +using namespace llvm; + +typedef BitTracker BT; + +HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, + MachineRegisterInfo &mri, + const HexagonInstrInfo &tii, + MachineFunction &mf) + : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) { + // Populate the VRX map (VR to extension-type). + // Go over all the formal parameters of the function. If a given parameter + // P is sign- or zero-extended, locate the virtual register holding that + // parameter and create an entry in the VRX map indicating the type of ex- + // tension (and the source type). + // This is a bit complicated to do accurately, since the memory layout in- + // formation is necessary to precisely determine whether an aggregate para- + // meter will be passed in a register or in memory. What is given in MRI + // is the association between the physical register that is live-in (i.e. + // holds an argument), and the virtual register that this value will be + // copied into. This, by itself, is not sufficient to map back the virtual + // register to a formal parameter from Function (since consecutive live-ins + // from MRI may not correspond to consecutive formal parameters from Func- + // tion). To avoid the complications with in-memory arguments, only consi- + // der the initial sequence of formal parameters that are known to be + // passed via registers. + unsigned AttrIdx = 0; + unsigned InVirtReg, InPhysReg = 0; + const Function &F = *MF.getFunction(); + typedef Function::const_arg_iterator arg_iterator; + for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { + AttrIdx++; + const Argument &Arg = *I; + Type *ATy = Arg.getType(); + unsigned Width = 0; + if (ATy->isIntegerTy()) + Width = ATy->getIntegerBitWidth(); + else if (ATy->isPointerTy()) + Width = 32; + // If pointer size is not set through target data, it will default to + // Module::AnyPointerSize. + if (Width == 0 || Width > 64) + break; + InPhysReg = getNextPhysReg(InPhysReg, Width); + if (!InPhysReg) + break; + InVirtReg = getVirtRegFor(InPhysReg); + if (!InVirtReg) + continue; + AttributeSet Attrs = F.getAttributes(); + if (Attrs.hasAttribute(AttrIdx, Attribute::SExt)) + VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width))); + else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt)) + VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width))); + } +} + + +BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { + if (Sub == 0) + return MachineEvaluator::mask(Reg, 0); + using namespace Hexagon; + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + unsigned ID = RC->getID(); + uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); + switch (ID) { + case DoubleRegsRegClassID: + case VecDblRegsRegClassID: + case VecDblRegs128BRegClassID: + return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1) + : BT::BitMask(RW, 2*RW-1); + default: + break; + } +#ifndef NDEBUG + dbgs() << PrintReg(Reg, &TRI, Sub) << '\n'; +#endif + llvm_unreachable("Unexpected register/subregister"); +} + +namespace { +class RegisterRefs { + std::vector<BT::RegisterRef> Vector; + +public: + RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) { + for (unsigned i = 0, n = Vector.size(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) + Vector[i] = BT::RegisterRef(MO); + // For indices that don't correspond to registers, the entry will + // remain constructed via the default constructor. + } + } + + size_t size() const { return Vector.size(); } + const BT::RegisterRef &operator[](unsigned n) const { + // The main purpose of this operator is to assert with bad argument. + assert(n < Vector.size()); + return Vector[n]; + } +}; +} + +bool HexagonEvaluator::evaluate(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + unsigned NumDefs = 0; + + // Sanity verification: there should not be any defs with subregisters. + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + NumDefs++; + assert(MO.getSubReg() == 0); + } + + if (NumDefs == 0) + return false; + + if (MI->mayLoad()) + return evaluateLoad(MI, Inputs, Outputs); + + // Check COPY instructions that copy formal parameters into virtual + // registers. Such parameters can be sign- or zero-extended at the + // call site, and we should take advantage of this knowledge. The MRI + // keeps a list of pairs of live-in physical and virtual registers, + // which provides information about which virtual registers will hold + // the argument values. The function will still contain instructions + // defining those virtual registers, and in practice those are COPY + // instructions from a physical to a virtual register. In such cases, + // applying the argument extension to the virtual register can be seen + // as simply mirroring the extension that had already been applied to + // the physical register at the call site. If the defining instruction + // was not a COPY, it would not be clear how to mirror that extension + // on the callee's side. For that reason, only check COPY instructions + // for potential extensions. + if (MI->isCopy()) { + if (evaluateFormalCopy(MI, Inputs, Outputs)) + return true; + } + + // Beyond this point, if any operand is a global, skip that instruction. + // The reason is that certain instructions that can take an immediate + // operand can also have a global symbol in that operand. To avoid + // checking what kind of operand a given instruction has individually + // for each instruction, do it here. Global symbols as operands gene- + // rally do not provide any useful information. + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() || + MO.isCPI()) + return false; + } + + RegisterRefs Reg(MI); + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + #define op(i) MI->getOperand(i) + #define rc(i) RegisterCell::ref(getCell(Reg[i],Inputs)) + #define im(i) MI->getOperand(i).getImm() + + // If the instruction has no register operands, skip it. + if (Reg.size() == 0) + return false; + + // Record result for register in operand 0. + auto rr0 = [this,Reg] (const BT::RegisterCell &Val, CellMapType &Outputs) + -> bool { + putCell(Reg[0], Val, Outputs); + return true; + }; + // Get the cell corresponding to the N-th operand. + auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W) + -> BT::RegisterCell { + const MachineOperand &Op = MI->getOperand(N); + if (Op.isImm()) + return eIMM(Op.getImm(), W); + if (!Op.isReg()) + return RegisterCell::self(0, W); + assert(getRegBitWidth(Reg[N]) == W && "Register width mismatch"); + return rc(N); + }; + // Extract RW low bits of the cell. + auto lo = [this] (const BT::RegisterCell &RC, uint16_t RW) + -> BT::RegisterCell { + assert(RW <= RC.width()); + return eXTR(RC, 0, RW); + }; + // Extract RW high bits of the cell. + auto hi = [this] (const BT::RegisterCell &RC, uint16_t RW) + -> BT::RegisterCell { + uint16_t W = RC.width(); + assert(RW <= W); + return eXTR(RC, W-RW, W); + }; + // Extract N-th halfword (counting from the least significant position). + auto half = [this] (const BT::RegisterCell &RC, unsigned N) + -> BT::RegisterCell { + assert(N*16+16 <= RC.width()); + return eXTR(RC, N*16, N*16+16); + }; + // Shuffle bits (pick even/odd from cells and merge into result). + auto shuffle = [this] (const BT::RegisterCell &Rs, const BT::RegisterCell &Rt, + uint16_t BW, bool Odd) -> BT::RegisterCell { + uint16_t I = Odd, Ws = Rs.width(); + assert(Ws == Rt.width()); + RegisterCell RC = eXTR(Rt, I*BW, I*BW+BW).cat(eXTR(Rs, I*BW, I*BW+BW)); + I += 2; + while (I*BW < Ws) { + RC.cat(eXTR(Rt, I*BW, I*BW+BW)).cat(eXTR(Rs, I*BW, I*BW+BW)); + I += 2; + } + return RC; + }; + + // The bitwidth of the 0th operand. In most (if not all) of the + // instructions below, the 0th operand is the defined register. + // Pre-compute the bitwidth here, because it is needed in many cases + // cases below. + uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0; + + switch (Opc) { + // Transfer immediate: + + case A2_tfrsi: + case A2_tfrpi: + case CONST32: + case CONST32_Float_Real: + case CONST32_Int_Real: + case CONST64_Float_Real: + case CONST64_Int_Real: + return rr0(eIMM(im(1), W0), Outputs); + case TFR_PdFalse: + return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs); + case TFR_PdTrue: + return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs); + case TFR_FI: { + int FI = op(1).getIndex(); + int Off = op(2).getImm(); + unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off); + unsigned L = Log2_32(A); + RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0); + RC.fill(0, L, BT::BitValue::Zero); + return rr0(RC, Outputs); + } + + // Transfer register: + + case A2_tfr: + case A2_tfrp: + case C2_pxfer_map: + return rr0(rc(1), Outputs); + case C2_tfrpr: { + uint16_t RW = W0; + uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]); + assert(PW <= RW); + RegisterCell PC = eXTR(rc(1), 0, PW); + RegisterCell RC = RegisterCell(RW).insert(PC, BT::BitMask(0, PW-1)); + RC.fill(PW, RW, BT::BitValue::Zero); + return rr0(RC, Outputs); + } + case C2_tfrrp: { + RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0); + W0 = 8; // XXX Pred size + return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs); + } + + // Arithmetic: + + case A2_abs: + case A2_absp: + // TODO + break; + + case A2_addsp: { + uint16_t W1 = getRegBitWidth(Reg[1]); + assert(W0 == 64 && W1 == 32); + RegisterCell CW = RegisterCell(W0).insert(rc(1), BT::BitMask(0, W1-1)); + RegisterCell RC = eADD(eSXT(CW, W1), rc(2)); + return rr0(RC, Outputs); + } + case A2_add: + case A2_addp: + return rr0(eADD(rc(1), rc(2)), Outputs); + case A2_addi: + return rr0(eADD(rc(1), eIMM(im(2), W0)), Outputs); + case S4_addi_asl_ri: { + RegisterCell RC = eADD(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_addi_lsr_ri: { + RegisterCell RC = eADD(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_addaddi: { + RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M4_mpyri_addi: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyrr_addi: { + RegisterCell M = eMLS(rc(2), rc(3)); + RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyri_addr_u2: { + RegisterCell M = eMLS(eIMM(im(2), W0), rc(3)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyri_addr: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyrr_addr: { + RegisterCell M = eMLS(rc(2), rc(3)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case S4_subaddi: { + RegisterCell RC = eADD(rc(1), eSUB(eIMM(im(2), W0), rc(3))); + return rr0(RC, Outputs); + } + case M2_accii: { + RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M2_acci: { + RegisterCell RC = eADD(rc(1), eADD(rc(2), rc(3))); + return rr0(RC, Outputs); + } + case M2_subacc: { + RegisterCell RC = eADD(rc(1), eSUB(rc(2), rc(3))); + return rr0(RC, Outputs); + } + case S2_addasl_rrri: { + RegisterCell RC = eADD(rc(1), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case C4_addipc: { + RegisterCell RPC = RegisterCell::self(Reg[0].Reg, W0); + RPC.fill(0, 2, BT::BitValue::Zero); + return rr0(eADD(RPC, eIMM(im(2), W0)), Outputs); + } + case A2_sub: + case A2_subp: + return rr0(eSUB(rc(1), rc(2)), Outputs); + case A2_subri: + return rr0(eSUB(eIMM(im(1), W0), rc(2)), Outputs); + case S4_subi_asl_ri: { + RegisterCell RC = eSUB(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_subi_lsr_ri: { + RegisterCell RC = eSUB(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case M2_naccii: { + RegisterCell RC = eSUB(rc(1), eADD(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M2_nacci: { + RegisterCell RC = eSUB(rc(1), eADD(rc(2), rc(3))); + return rr0(RC, Outputs); + } + // 32-bit negation is done by "Rd = A2_subri 0, Rs" + case A2_negp: + return rr0(eSUB(eIMM(0, W0), rc(1)), Outputs); + + case M2_mpy_up: { + RegisterCell M = eMLS(rc(1), rc(2)); + return rr0(hi(M, W0), Outputs); + } + case M2_dpmpyss_s0: + return rr0(eMLS(rc(1), rc(2)), Outputs); + case M2_dpmpyss_acc_s0: + return rr0(eADD(rc(1), eMLS(rc(2), rc(3))), Outputs); + case M2_dpmpyss_nac_s0: + return rr0(eSUB(rc(1), eMLS(rc(2), rc(3))), Outputs); + case M2_mpyi: { + RegisterCell M = eMLS(rc(1), rc(2)); + return rr0(lo(M, W0), Outputs); + } + case M2_macsip: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M2_macsin: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eSUB(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M2_maci: { + RegisterCell M = eMLS(rc(2), rc(3)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M2_mpysmi: { + RegisterCell M = eMLS(rc(1), eIMM(im(2), W0)); + return rr0(lo(M, 32), Outputs); + } + case M2_mpysin: { + RegisterCell M = eMLS(rc(1), eIMM(-im(2), W0)); + return rr0(lo(M, 32), Outputs); + } + case M2_mpysip: { + RegisterCell M = eMLS(rc(1), eIMM(im(2), W0)); + return rr0(lo(M, 32), Outputs); + } + case M2_mpyu_up: { + RegisterCell M = eMLU(rc(1), rc(2)); + return rr0(hi(M, W0), Outputs); + } + case M2_dpmpyuu_s0: + return rr0(eMLU(rc(1), rc(2)), Outputs); + case M2_dpmpyuu_acc_s0: + return rr0(eADD(rc(1), eMLU(rc(2), rc(3))), Outputs); + case M2_dpmpyuu_nac_s0: + return rr0(eSUB(rc(1), eMLU(rc(2), rc(3))), Outputs); + //case M2_mpysu_up: + + // Logical/bitwise: + + case A2_andir: + return rr0(eAND(rc(1), eIMM(im(2), W0)), Outputs); + case A2_and: + case A2_andp: + return rr0(eAND(rc(1), rc(2)), Outputs); + case A4_andn: + case A4_andnp: + return rr0(eAND(rc(1), eNOT(rc(2))), Outputs); + case S4_andi_asl_ri: { + RegisterCell RC = eAND(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_andi_lsr_ri: { + RegisterCell RC = eAND(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case M4_and_and: + return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs); + case M4_and_andn: + return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case M4_and_or: + return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs); + case M4_and_xor: + return rr0(eAND(rc(1), eXOR(rc(2), rc(3))), Outputs); + case A2_orir: + return rr0(eORL(rc(1), eIMM(im(2), W0)), Outputs); + case A2_or: + case A2_orp: + return rr0(eORL(rc(1), rc(2)), Outputs); + case A4_orn: + case A4_ornp: + return rr0(eORL(rc(1), eNOT(rc(2))), Outputs); + case S4_ori_asl_ri: { + RegisterCell RC = eORL(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_ori_lsr_ri: { + RegisterCell RC = eORL(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case M4_or_and: + return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs); + case M4_or_andn: + return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case S4_or_andi: + case S4_or_andix: { + RegisterCell RC = eORL(rc(1), eAND(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case S4_or_ori: { + RegisterCell RC = eORL(rc(1), eORL(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M4_or_or: + return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs); + case M4_or_xor: + return rr0(eORL(rc(1), eXOR(rc(2), rc(3))), Outputs); + case A2_xor: + case A2_xorp: + return rr0(eXOR(rc(1), rc(2)), Outputs); + case M4_xor_and: + return rr0(eXOR(rc(1), eAND(rc(2), rc(3))), Outputs); + case M4_xor_andn: + return rr0(eXOR(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case M4_xor_or: + return rr0(eXOR(rc(1), eORL(rc(2), rc(3))), Outputs); + case M4_xor_xacc: + return rr0(eXOR(rc(1), eXOR(rc(2), rc(3))), Outputs); + case A2_not: + case A2_notp: + return rr0(eNOT(rc(1)), Outputs); + + case S2_asl_i_r: + case S2_asl_i_p: + return rr0(eASL(rc(1), im(2)), Outputs); + case A2_aslh: + return rr0(eASL(rc(1), 16), Outputs); + case S2_asl_i_r_acc: + case S2_asl_i_p_acc: + return rr0(eADD(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_nac: + case S2_asl_i_p_nac: + return rr0(eSUB(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_and: + case S2_asl_i_p_and: + return rr0(eAND(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_or: + case S2_asl_i_p_or: + return rr0(eORL(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_xacc: + case S2_asl_i_p_xacc: + return rr0(eXOR(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_vh: + case S2_asl_i_vw: + // TODO + break; + + case S2_asr_i_r: + case S2_asr_i_p: + return rr0(eASR(rc(1), im(2)), Outputs); + case A2_asrh: + return rr0(eASR(rc(1), 16), Outputs); + case S2_asr_i_r_acc: + case S2_asr_i_p_acc: + return rr0(eADD(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_nac: + case S2_asr_i_p_nac: + return rr0(eSUB(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_and: + case S2_asr_i_p_and: + return rr0(eAND(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_or: + case S2_asr_i_p_or: + return rr0(eORL(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_rnd: { + // The input is first sign-extended to 64 bits, then the output + // is truncated back to 32 bits. + assert(W0 == 32); + RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0); + RegisterCell RC = eASR(eADD(eASR(XC, im(2)), eIMM(1, 2*W0)), 1); + return rr0(eXTR(RC, 0, W0), Outputs); + } + case S2_asr_i_r_rnd_goodsyntax: { + int64_t S = im(2); + if (S == 0) + return rr0(rc(1), Outputs); + // Result: S2_asr_i_r_rnd Rs, u5-1 + RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0); + RegisterCell RC = eLSR(eADD(eASR(XC, S-1), eIMM(1, 2*W0)), 1); + return rr0(eXTR(RC, 0, W0), Outputs); + } + case S2_asr_r_vh: + case S2_asr_i_vw: + case S2_asr_i_svw_trun: + // TODO + break; + + case S2_lsr_i_r: + case S2_lsr_i_p: + return rr0(eLSR(rc(1), im(2)), Outputs); + case S2_lsr_i_r_acc: + case S2_lsr_i_p_acc: + return rr0(eADD(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_nac: + case S2_lsr_i_p_nac: + return rr0(eSUB(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_and: + case S2_lsr_i_p_and: + return rr0(eAND(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_or: + case S2_lsr_i_p_or: + return rr0(eORL(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_xacc: + case S2_lsr_i_p_xacc: + return rr0(eXOR(rc(1), eLSR(rc(2), im(3))), Outputs); + + case S2_clrbit_i: { + RegisterCell RC = rc(1); + RC[im(2)] = BT::BitValue::Zero; + return rr0(RC, Outputs); + } + case S2_setbit_i: { + RegisterCell RC = rc(1); + RC[im(2)] = BT::BitValue::One; + return rr0(RC, Outputs); + } + case S2_togglebit_i: { + RegisterCell RC = rc(1); + uint16_t BX = im(2); + RC[BX] = RC[BX].is(0) ? BT::BitValue::One + : RC[BX].is(1) ? BT::BitValue::Zero + : BT::BitValue::self(); + return rr0(RC, Outputs); + } + + case A4_bitspliti: { + uint16_t W1 = getRegBitWidth(Reg[1]); + uint16_t BX = im(2); + // Res.uw[1] = Rs[bx+1:], Res.uw[0] = Rs[0:bx] + const BT::BitValue Zero = BT::BitValue::Zero; + RegisterCell RZ = RegisterCell(W0).fill(BX, W1, Zero) + .fill(W1+(W1-BX), W0, Zero); + RegisterCell BF1 = eXTR(rc(1), 0, BX), BF2 = eXTR(rc(1), BX, W1); + RegisterCell RC = eINS(eINS(RZ, BF1, 0), BF2, W1); + return rr0(RC, Outputs); + } + case S4_extract: + case S4_extractp: + case S2_extractu: + case S2_extractup: { + uint16_t Wd = im(2), Of = im(3); + assert(Wd <= W0); + if (Wd == 0) + return rr0(eIMM(0, W0), Outputs); + // If the width extends beyond the register size, pad the register + // with 0 bits. + RegisterCell Pad = (Wd+Of > W0) ? rc(1).cat(eIMM(0, Wd+Of-W0)) : rc(1); + RegisterCell Ext = eXTR(Pad, Of, Wd+Of); + // Ext is short, need to extend it with 0s or sign bit. + RegisterCell RC = RegisterCell(W0).insert(Ext, BT::BitMask(0, Wd-1)); + if (Opc == S2_extractu || Opc == S2_extractup) + return rr0(eZXT(RC, Wd), Outputs); + return rr0(eSXT(RC, Wd), Outputs); + } + case S2_insert: + case S2_insertp: { + uint16_t Wd = im(3), Of = im(4); + assert(Wd < W0 && Of < W0); + // If Wd+Of exceeds W0, the inserted bits are truncated. + if (Wd+Of > W0) + Wd = W0-Of; + if (Wd == 0) + return rr0(rc(1), Outputs); + return rr0(eINS(rc(1), eXTR(rc(2), 0, Wd), Of), Outputs); + } + + // Bit permutations: + + case A2_combineii: + case A4_combineii: + case A4_combineir: + case A4_combineri: + case A2_combinew: + assert(W0 % 2 == 0); + return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs); + case A2_combine_ll: + case A2_combine_lh: + case A2_combine_hl: + case A2_combine_hh: { + assert(W0 == 32); + assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32); + // Low half in the output is 0 for _ll and _hl, 1 otherwise: + unsigned LoH = !(Opc == A2_combine_ll || Opc == A2_combine_hl); + // High half in the output is 0 for _ll and _lh, 1 otherwise: + unsigned HiH = !(Opc == A2_combine_ll || Opc == A2_combine_lh); + RegisterCell R1 = rc(1); + RegisterCell R2 = rc(2); + RegisterCell RC = half(R2, LoH).cat(half(R1, HiH)); + return rr0(RC, Outputs); + } + case S2_packhl: { + assert(W0 == 64); + assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32); + RegisterCell R1 = rc(1); + RegisterCell R2 = rc(2); + RegisterCell RC = half(R2, 0).cat(half(R1, 0)).cat(half(R2, 1)) + .cat(half(R1, 1)); + return rr0(RC, Outputs); + } + case S2_shuffeb: { + RegisterCell RC = shuffle(rc(1), rc(2), 8, false); + return rr0(RC, Outputs); + } + case S2_shuffeh: { + RegisterCell RC = shuffle(rc(1), rc(2), 16, false); + return rr0(RC, Outputs); + } + case S2_shuffob: { + RegisterCell RC = shuffle(rc(1), rc(2), 8, true); + return rr0(RC, Outputs); + } + case S2_shuffoh: { + RegisterCell RC = shuffle(rc(1), rc(2), 16, true); + return rr0(RC, Outputs); + } + case C2_mask: { + uint16_t WR = W0; + uint16_t WP = 8; // XXX Pred size: getRegBitWidth(Reg[1]); + assert(WR == 64 && WP == 8); + RegisterCell R1 = rc(1); + RegisterCell RC(WR); + for (uint16_t i = 0; i < WP; ++i) { + const BT::BitValue &V = R1[i]; + BT::BitValue F = (V.is(0) || V.is(1)) ? V : BT::BitValue::self(); + RC.fill(i*8, i*8+8, F); + } + return rr0(RC, Outputs); + } + + // Mux: + + case C2_muxii: + case C2_muxir: + case C2_muxri: + case C2_mux: { + BT::BitValue PC0 = rc(1)[0]; + RegisterCell R2 = cop(2, W0); + RegisterCell R3 = cop(3, W0); + if (PC0.is(0) || PC0.is(1)) + return rr0(RegisterCell::ref(PC0 ? R2 : R3), Outputs); + R2.meet(R3, Reg[0].Reg); + return rr0(R2, Outputs); + } + case C2_vmux: + // TODO + break; + + // Sign- and zero-extension: + + case A2_sxtb: + return rr0(eSXT(rc(1), 8), Outputs); + case A2_sxth: + return rr0(eSXT(rc(1), 16), Outputs); + case A2_sxtw: { + uint16_t W1 = getRegBitWidth(Reg[1]); + assert(W0 == 64 && W1 == 32); + RegisterCell RC = eSXT(rc(1).cat(eIMM(0, W1)), W1); + return rr0(RC, Outputs); + } + case A2_zxtb: + return rr0(eZXT(rc(1), 8), Outputs); + case A2_zxth: + return rr0(eZXT(rc(1), 16), Outputs); + + // Bit count: + + case S2_cl0: + case S2_cl0p: + // Always produce a 32-bit result. + return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs); + case S2_cl1: + case S2_cl1p: + return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs); + case S2_clb: + case S2_clbp: { + uint16_t W1 = getRegBitWidth(Reg[1]); + RegisterCell R1 = rc(1); + BT::BitValue TV = R1[W1-1]; + if (TV.is(0) || TV.is(1)) + return rr0(eCLB(R1, TV, 32), Outputs); + break; + } + case S2_ct0: + case S2_ct0p: + return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs); + case S2_ct1: + case S2_ct1p: + return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs); + case S5_popcountp: + // TODO + break; + + case C2_all8: { + RegisterCell P1 = rc(1); + bool Has0 = false, All1 = true; + for (uint16_t i = 0; i < 8/*XXX*/; ++i) { + if (!P1[i].is(1)) + All1 = false; + if (!P1[i].is(0)) + continue; + Has0 = true; + break; + } + if (!Has0 && !All1) + break; + RegisterCell RC(W0); + RC.fill(0, W0, (All1 ? BT::BitValue::One : BT::BitValue::Zero)); + return rr0(RC, Outputs); + } + case C2_any8: { + RegisterCell P1 = rc(1); + bool Has1 = false, All0 = true; + for (uint16_t i = 0; i < 8/*XXX*/; ++i) { + if (!P1[i].is(0)) + All0 = false; + if (!P1[i].is(1)) + continue; + Has1 = true; + break; + } + if (!Has1 && !All0) + break; + RegisterCell RC(W0); + RC.fill(0, W0, (Has1 ? BT::BitValue::One : BT::BitValue::Zero)); + return rr0(RC, Outputs); + } + case C2_and: + return rr0(eAND(rc(1), rc(2)), Outputs); + case C2_andn: + return rr0(eAND(rc(1), eNOT(rc(2))), Outputs); + case C2_not: + return rr0(eNOT(rc(1)), Outputs); + case C2_or: + return rr0(eORL(rc(1), rc(2)), Outputs); + case C2_orn: + return rr0(eORL(rc(1), eNOT(rc(2))), Outputs); + case C2_xor: + return rr0(eXOR(rc(1), rc(2)), Outputs); + case C4_and_and: + return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs); + case C4_and_andn: + return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case C4_and_or: + return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs); + case C4_and_orn: + return rr0(eAND(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs); + case C4_or_and: + return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs); + case C4_or_andn: + return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case C4_or_or: + return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs); + case C4_or_orn: + return rr0(eORL(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs); + case C2_bitsclr: + case C2_bitsclri: + case C2_bitsset: + case C4_nbitsclr: + case C4_nbitsclri: + case C4_nbitsset: + // TODO + break; + case S2_tstbit_i: + case S4_ntstbit_i: { + BT::BitValue V = rc(1)[im(2)]; + if (V.is(0) || V.is(1)) { + // If instruction is S2_tstbit_i, test for 1, otherwise test for 0. + bool TV = (Opc == S2_tstbit_i); + BT::BitValue F = V.is(TV) ? BT::BitValue::One : BT::BitValue::Zero; + return rr0(RegisterCell(W0).fill(0, W0, F), Outputs); + } + break; + } + + default: + return MachineEvaluator::evaluate(MI, Inputs, Outputs); + } + #undef im + #undef rc + #undef op + return false; +} + + +bool HexagonEvaluator::evaluate(const MachineInstr *BI, + const CellMapType &Inputs, BranchTargetList &Targets, + bool &FallsThru) const { + // We need to evaluate one branch at a time. TII::AnalyzeBranch checks + // all the branches in a basic block at once, so we cannot use it. + unsigned Opc = BI->getOpcode(); + bool SimpleBranch = false; + bool Negated = false; + switch (Opc) { + case Hexagon::J2_jumpf: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumpfnewpt: + Negated = true; + case Hexagon::J2_jumpt: + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumptnewpt: + // Simple branch: if([!]Pn) jump ... + // i.e. Op0 = predicate, Op1 = branch target. + SimpleBranch = true; + break; + case Hexagon::J2_jump: + Targets.insert(BI->getOperand(0).getMBB()); + FallsThru = false; + return true; + default: + // If the branch is of unknown type, assume that all successors are + // executable. + return false; + } + + if (!SimpleBranch) + return false; + + // BI is a conditional branch if we got here. + RegisterRef PR = BI->getOperand(0); + RegisterCell PC = getCell(PR, Inputs); + const BT::BitValue &Test = PC[0]; + + // If the condition is neither true nor false, then it's unknown. + if (!Test.is(0) && !Test.is(1)) + return false; + + // "Test.is(!Negated)" means "branch condition is true". + if (!Test.is(!Negated)) { + // Condition known to be false. + FallsThru = true; + return true; + } + + Targets.insert(BI->getOperand(1).getMBB()); + FallsThru = false; + return true; +} + + +bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + if (TII.isPredicated(MI)) + return false; + assert(MI->mayLoad() && "A load that mayn't?"); + unsigned Opc = MI->getOpcode(); + + uint16_t BitNum; + bool SignEx; + using namespace Hexagon; + + switch (Opc) { + default: + return false; + +#if 0 + // memb_fifo + case L2_loadalignb_pbr: + case L2_loadalignb_pcr: + case L2_loadalignb_pi: + // memh_fifo + case L2_loadalignh_pbr: + case L2_loadalignh_pcr: + case L2_loadalignh_pi: + // membh + case L2_loadbsw2_pbr: + case L2_loadbsw2_pci: + case L2_loadbsw2_pcr: + case L2_loadbsw2_pi: + case L2_loadbsw4_pbr: + case L2_loadbsw4_pci: + case L2_loadbsw4_pcr: + case L2_loadbsw4_pi: + // memubh + case L2_loadbzw2_pbr: + case L2_loadbzw2_pci: + case L2_loadbzw2_pcr: + case L2_loadbzw2_pi: + case L2_loadbzw4_pbr: + case L2_loadbzw4_pci: + case L2_loadbzw4_pcr: + case L2_loadbzw4_pi: +#endif + + case L2_loadrbgp: + case L2_loadrb_io: + case L2_loadrb_pbr: + case L2_loadrb_pci: + case L2_loadrb_pcr: + case L2_loadrb_pi: + case L4_loadrb_abs: + case L4_loadrb_ap: + case L4_loadrb_rr: + case L4_loadrb_ur: + BitNum = 8; + SignEx = true; + break; + + case L2_loadrubgp: + case L2_loadrub_io: + case L2_loadrub_pbr: + case L2_loadrub_pci: + case L2_loadrub_pcr: + case L2_loadrub_pi: + case L4_loadrub_abs: + case L4_loadrub_ap: + case L4_loadrub_rr: + case L4_loadrub_ur: + BitNum = 8; + SignEx = false; + break; + + case L2_loadrhgp: + case L2_loadrh_io: + case L2_loadrh_pbr: + case L2_loadrh_pci: + case L2_loadrh_pcr: + case L2_loadrh_pi: + case L4_loadrh_abs: + case L4_loadrh_ap: + case L4_loadrh_rr: + case L4_loadrh_ur: + BitNum = 16; + SignEx = true; + break; + + case L2_loadruhgp: + case L2_loadruh_io: + case L2_loadruh_pbr: + case L2_loadruh_pci: + case L2_loadruh_pcr: + case L2_loadruh_pi: + case L4_loadruh_rr: + case L4_loadruh_abs: + case L4_loadruh_ap: + case L4_loadruh_ur: + BitNum = 16; + SignEx = false; + break; + + case L2_loadrigp: + case L2_loadri_io: + case L2_loadri_pbr: + case L2_loadri_pci: + case L2_loadri_pcr: + case L2_loadri_pi: + case L2_loadw_locked: + case L4_loadri_abs: + case L4_loadri_ap: + case L4_loadri_rr: + case L4_loadri_ur: + case LDriw_pred: + BitNum = 32; + SignEx = true; + break; + + case L2_loadrdgp: + case L2_loadrd_io: + case L2_loadrd_pbr: + case L2_loadrd_pci: + case L2_loadrd_pcr: + case L2_loadrd_pi: + case L4_loadd_locked: + case L4_loadrd_abs: + case L4_loadrd_ap: + case L4_loadrd_rr: + case L4_loadrd_ur: + BitNum = 64; + SignEx = true; + break; + } + + const MachineOperand &MD = MI->getOperand(0); + assert(MD.isReg() && MD.isDef()); + RegisterRef RD = MD; + + uint16_t W = getRegBitWidth(RD); + assert(W >= BitNum && BitNum > 0); + RegisterCell Res(W); + + for (uint16_t i = 0; i < BitNum; ++i) + Res[i] = BT::BitValue::self(BT::BitRef(RD.Reg, i)); + + if (SignEx) { + const BT::BitValue &Sign = Res[BitNum-1]; + for (uint16_t i = BitNum; i < W; ++i) + Res[i] = BT::BitValue::ref(Sign); + } else { + for (uint16_t i = BitNum; i < W; ++i) + Res[i] = BT::BitValue::Zero; + } + + putCell(RD, Res, Outputs); + return true; +} + + +bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + // If MI defines a formal parameter, but is not a copy (loads are handled + // in evaluateLoad), then it's not clear what to do. + assert(MI->isCopy()); + + RegisterRef RD = MI->getOperand(0); + RegisterRef RS = MI->getOperand(1); + assert(RD.Sub == 0); + if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg)) + return false; + RegExtMap::const_iterator F = VRX.find(RD.Reg); + if (F == VRX.end()) + return false; + + uint16_t EW = F->second.Width; + // Store RD's cell into the map. This will associate the cell with a virtual + // register, and make zero-/sign-extends possible (otherwise we would be ex- + // tending "self" bit values, which will have no effect, since "self" values + // cannot be references to anything). + putCell(RD, getCell(RS, Inputs), Outputs); + + RegisterCell Res; + // Read RD's cell from the outputs instead of RS's cell from the inputs: + if (F->second.Type == ExtType::SExt) + Res = eSXT(getCell(RD, Outputs), EW); + else if (F->second.Type == ExtType::ZExt) + Res = eZXT(getCell(RD, Outputs), EW); + + putCell(RD, Res, Outputs); + return true; +} + + +unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { + using namespace Hexagon; + bool Is64 = DoubleRegsRegClass.contains(PReg); + assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg)); + + static const unsigned Phys32[] = { R0, R1, R2, R3, R4, R5 }; + static const unsigned Phys64[] = { D0, D1, D2 }; + const unsigned Num32 = sizeof(Phys32)/sizeof(unsigned); + const unsigned Num64 = sizeof(Phys64)/sizeof(unsigned); + + // Return the first parameter register of the required width. + if (PReg == 0) + return (Width <= 32) ? Phys32[0] : Phys64[0]; + + // Set Idx32, Idx64 in such a way that Idx+1 would give the index of the + // next register. + unsigned Idx32 = 0, Idx64 = 0; + if (!Is64) { + while (Idx32 < Num32) { + if (Phys32[Idx32] == PReg) + break; + Idx32++; + } + Idx64 = Idx32/2; + } else { + while (Idx64 < Num64) { + if (Phys64[Idx64] == PReg) + break; + Idx64++; + } + Idx32 = Idx64*2+1; + } + + if (Width <= 32) + return (Idx32+1 < Num32) ? Phys32[Idx32+1] : 0; + return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0; +} + + +unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { + typedef MachineRegisterInfo::livein_iterator iterator; + for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { + if (I->first == PReg) + return I->second; + } + return 0; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h new file mode 100644 index 0000000..897af2d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h @@ -0,0 +1,64 @@ +//===--- HexagonBitTracker.h ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONBITTRACKER_H +#define HEXAGONBITTRACKER_H + +#include "BitTracker.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class HexagonInstrInfo; + class HexagonRegisterInfo; + +struct HexagonEvaluator : public BitTracker::MachineEvaluator { + typedef BitTracker::CellMapType CellMapType; + typedef BitTracker::RegisterRef RegisterRef; + typedef BitTracker::RegisterCell RegisterCell; + typedef BitTracker::BranchTargetList BranchTargetList; + + HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri, + const HexagonInstrInfo &tii, MachineFunction &mf); + + bool evaluate(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const override; + bool evaluate(const MachineInstr *BI, const CellMapType &Inputs, + BranchTargetList &Targets, bool &FallsThru) const override; + + BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override; + + MachineFunction &MF; + MachineFrameInfo &MFI; + const HexagonInstrInfo &TII; + +private: + bool evaluateLoad(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const; + bool evaluateFormalCopy(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const; + + unsigned getNextPhysReg(unsigned PReg, unsigned Width) const; + unsigned getVirtRegFor(unsigned PReg) const; + + // Type of formal parameter extension. + struct ExtType { + enum { SExt, ZExt }; + char Type; + uint16_t Width; + ExtType() : Type(0), Width(0) {} + ExtType(char t, uint16_t w) : Type(t), Width(w) {} + }; + // Map VR -> extension type. + typedef DenseMap<unsigned, ExtType> RegExtMap; + RegExtMap VRX; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp new file mode 100644 index 0000000..efafdd0 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -0,0 +1,247 @@ +//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon_cfg" + +namespace llvm { + FunctionPass *createHexagonCFGOptimizer(); + void initializeHexagonCFGOptimizerPass(PassRegistry&); +} + + +namespace { + +class HexagonCFGOptimizer : public MachineFunctionPass { + +private: + void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); + + public: + static char ID; + HexagonCFGOptimizer() : MachineFunctionPass(ID) { + initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "Hexagon CFG Optimizer"; + } + bool runOnMachineFunction(MachineFunction &Fn) override; +}; + + +char HexagonCFGOptimizer::ID = 0; + +static bool IsConditionalBranch(int Opc) { + return (Opc == Hexagon::J2_jumpt) || (Opc == Hexagon::J2_jumpf) + || (Opc == Hexagon::J2_jumptnewpt) || (Opc == Hexagon::J2_jumpfnewpt); +} + + +static bool IsUnconditionalJump(int Opc) { + return (Opc == Hexagon::J2_jump); +} + + +void +HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, + MachineBasicBlock* NewTarget) { + const TargetInstrInfo *TII = + MI->getParent()->getParent()->getSubtarget().getInstrInfo(); + int NewOpcode = 0; + switch(MI->getOpcode()) { + case Hexagon::J2_jumpt: + NewOpcode = Hexagon::J2_jumpf; + break; + + case Hexagon::J2_jumpf: + NewOpcode = Hexagon::J2_jumpt; + break; + + case Hexagon::J2_jumptnewpt: + NewOpcode = Hexagon::J2_jumpfnewpt; + break; + + case Hexagon::J2_jumpfnewpt: + NewOpcode = Hexagon::J2_jumptnewpt; + break; + + default: + llvm_unreachable("Cannot handle this case"); + } + + MI->setDesc(TII->get(NewOpcode)); + MI->getOperand(1).setMBB(NewTarget); +} + + +bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock *MBB = &*MBBb; + + // Traverse the basic block. + MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); + if (MII != MBB->end()) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (IsConditionalBranch(Opc)) { + + // + // (Case 1) Transform the code if the following condition occurs: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...next block in layout is BB3... + // BB3: ... + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // + // (Case 2) A variation occurs when BB3 contains a JMP to BB4: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...other basic blocks ... + // BB4: + // ...not a fall-thru + // BB3: ... + // jump BB4 + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // BB4: ... + // + unsigned NumSuccs = MBB->succ_size(); + MachineBasicBlock::succ_iterator SI = MBB->succ_begin(); + MachineBasicBlock* FirstSucc = *SI; + MachineBasicBlock* SecondSucc = *(++SI); + MachineBasicBlock* LayoutSucc = nullptr; + MachineBasicBlock* JumpAroundTarget = nullptr; + + if (MBB->isLayoutSuccessor(FirstSucc)) { + LayoutSucc = FirstSucc; + JumpAroundTarget = SecondSucc; + } else if (MBB->isLayoutSuccessor(SecondSucc)) { + LayoutSucc = SecondSucc; + JumpAroundTarget = FirstSucc; + } else { + // Odd case...cannot handle. + } + + // The target of the unconditional branch must be JumpAroundTarget. + // TODO: If not, we should not invert the unconditional branch. + MachineBasicBlock* CondBranchTarget = nullptr; + if ((MI->getOpcode() == Hexagon::J2_jumpt) || + (MI->getOpcode() == Hexagon::J2_jumpf)) { + CondBranchTarget = MI->getOperand(1).getMBB(); + } + + if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) { + continue; + } + + if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) { + + // Ensure that BB2 has one instruction -- an unconditional jump. + if ((LayoutSucc->size() == 1) && + IsUnconditionalJump(LayoutSucc->front().getOpcode())) { + MachineBasicBlock* UncondTarget = + LayoutSucc->front().getOperand(0).getMBB(); + // Check if the layout successor of BB2 is BB3. + bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget); + bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) && + JumpAroundTarget->size() >= 1 && + IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) && + JumpAroundTarget->pred_size() == 1 && + JumpAroundTarget->succ_size() == 1; + + if (case1 || case2) { + InvertAndChangeJumpTarget(MI, UncondTarget); + MBB->replaceSuccessor(JumpAroundTarget, UncondTarget); + + // Remove the unconditional branch in LayoutSucc. + LayoutSucc->erase(LayoutSucc->begin()); + LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget); + + // This code performs the conversion for case 2, which moves + // the block to the fall-thru case (BB3 in the code above). + if (case2 && !case1) { + JumpAroundTarget->moveAfter(LayoutSucc); + // only move a block if it doesn't have a fall-thru. otherwise + // the CFG will be incorrect. + if (!UncondTarget->canFallThrough()) { + UncondTarget->moveAfter(JumpAroundTarget); + } + } + + // + // Correct live-in information. Is used by post-RA scheduler + // The live-in to LayoutSucc is now all values live-in to + // JumpAroundTarget. + // + std::vector<MachineBasicBlock::RegisterMaskPair> OrigLiveIn( + LayoutSucc->livein_begin(), LayoutSucc->livein_end()); + std::vector<MachineBasicBlock::RegisterMaskPair> NewLiveIn( + JumpAroundTarget->livein_begin(), + JumpAroundTarget->livein_end()); + for (const auto &OrigLI : OrigLiveIn) + LayoutSucc->removeLiveIn(OrigLI.PhysReg); + for (const auto &NewLI : NewLiveIn) + LayoutSucc->addLiveIn(NewLI); + } + } + } + } + } + } + return true; +} +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg", + &HexagonCFGOptimizer::ID, nullptr, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass *llvm::createHexagonCFGOptimizer() { + return new HexagonCFGOptimizer(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td new file mode 100644 index 0000000..e61b2a7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td @@ -0,0 +1,35 @@ +//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the Hexagon architectures. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// Hexagon 32-bit C return-value convention. +def RetCC_Hexagon32 : CallingConv<[ + CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; + +// Hexagon 32-bit C Calling convention. +def CC_Hexagon32 : CallingConv<[ + // All arguments get passed in integer registers if there is space. + CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp new file mode 100644 index 0000000..931db66 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -0,0 +1,1310 @@ +//===--- HexagonCommonGEP.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "commgep" + +#include "llvm/Pass.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" + +#include <map> +#include <set> +#include <vector> + +#include "HexagonTargetMachine.h" + +using namespace llvm; + +static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true), + cl::Hidden, cl::ZeroOrMore); + +static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden, + cl::ZeroOrMore); + +static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true), + cl::Hidden, cl::ZeroOrMore); + +namespace llvm { + void initializeHexagonCommonGEPPass(PassRegistry&); +} + +namespace { + struct GepNode; + typedef std::set<GepNode*> NodeSet; + typedef std::map<GepNode*,Value*> NodeToValueMap; + typedef std::vector<GepNode*> NodeVect; + typedef std::map<GepNode*,NodeVect> NodeChildrenMap; + typedef std::set<Use*> UseSet; + typedef std::map<GepNode*,UseSet> NodeToUsesMap; + + // Numbering map for gep nodes. Used to keep track of ordering for + // gep nodes. + struct NodeOrdering { + NodeOrdering() : LastNum(0) {} + + void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); } + void clear() { Map.clear(); } + + bool operator()(const GepNode *N1, const GepNode *N2) const { + auto F1 = Map.find(N1), F2 = Map.find(N2); + assert(F1 != Map.end() && F2 != Map.end()); + return F1->second < F2->second; + } + + private: + std::map<const GepNode *, unsigned> Map; + unsigned LastNum; + }; + + class HexagonCommonGEP : public FunctionPass { + public: + static char ID; + HexagonCommonGEP() : FunctionPass(ID) { + initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F); + virtual const char *getPassName() const { + return "Hexagon Common GEP"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.addPreserved<PostDominatorTree>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + private: + typedef std::map<Value*,GepNode*> ValueToNodeMap; + typedef std::vector<Value*> ValueVect; + typedef std::map<GepNode*,ValueVect> NodeToValuesMap; + + void getBlockTraversalOrder(BasicBlock *Root, ValueVect &Order); + bool isHandledGepForm(GetElementPtrInst *GepI); + void processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM); + void collect(); + void common(); + + BasicBlock *recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + BasicBlock *recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + bool isInvariantIn(Value *Val, Loop *L); + bool isInvariantIn(GepNode *Node, Loop *L); + bool isInMainPath(BasicBlock *B, Loop *L); + BasicBlock *adjustForInvariance(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + void separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc); + void separateConstantChains(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + void computeNodePlacement(NodeToValueMap &Loc); + + Value *fabricateGEP(NodeVect &NA, BasicBlock::iterator At, + BasicBlock *LocB); + void getAllUsersForNode(GepNode *Node, ValueVect &Values, + NodeChildrenMap &NCM); + void materialize(NodeToValueMap &Loc); + + void removeDeadCode(); + + NodeVect Nodes; + NodeToUsesMap Uses; + NodeOrdering NodeOrder; // Node ordering, for deterministic behavior. + SpecificBumpPtrAllocator<GepNode> *Mem; + LLVMContext *Ctx; + LoopInfo *LI; + DominatorTree *DT; + PostDominatorTree *PDT; + Function *Fn; + }; +} + + +char HexagonCommonGEP::ID = 0; +INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", + false, false) + +namespace { + struct GepNode { + enum { + None = 0, + Root = 0x01, + Internal = 0x02, + Used = 0x04 + }; + + uint32_t Flags; + union { + GepNode *Parent; + Value *BaseVal; + }; + Value *Idx; + Type *PTy; // Type of the pointer operand. + + GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {} + GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) { + if (Flags & Root) + BaseVal = N->BaseVal; + else + Parent = N->Parent; + } + friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN); + }; + + + Type *next_type(Type *Ty, Value *Idx) { + // Advance the type. + if (!Ty->isStructTy()) { + Type *NexTy = cast<SequentialType>(Ty)->getElementType(); + return NexTy; + } + // Otherwise it is a struct type. + ConstantInt *CI = dyn_cast<ConstantInt>(Idx); + assert(CI && "Struct type with non-constant index"); + int64_t i = CI->getValue().getSExtValue(); + Type *NextTy = cast<StructType>(Ty)->getElementType(i); + return NextTy; + } + + + raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) { + OS << "{ {"; + bool Comma = false; + if (GN.Flags & GepNode::Root) { + OS << "root"; + Comma = true; + } + if (GN.Flags & GepNode::Internal) { + if (Comma) + OS << ','; + OS << "internal"; + Comma = true; + } + if (GN.Flags & GepNode::Used) { + if (Comma) + OS << ','; + OS << "used"; + Comma = true; + } + OS << "} "; + if (GN.Flags & GepNode::Root) + OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')'; + else + OS << "Parent:" << GN.Parent; + + OS << " Idx:"; + if (ConstantInt *CI = dyn_cast<ConstantInt>(GN.Idx)) + OS << CI->getValue().getSExtValue(); + else if (GN.Idx->hasName()) + OS << GN.Idx->getName(); + else + OS << "<anon> =" << *GN.Idx; + + OS << " PTy:"; + if (GN.PTy->isStructTy()) { + StructType *STy = cast<StructType>(GN.PTy); + if (!STy->isLiteral()) + OS << GN.PTy->getStructName(); + else + OS << "<anon-struct>:" << *STy; + } + else + OS << *GN.PTy; + OS << " }"; + return OS; + } + + + template <typename NodeContainer> + void dump_node_container(raw_ostream &OS, const NodeContainer &S) { + typedef typename NodeContainer::const_iterator const_iterator; + for (const_iterator I = S.begin(), E = S.end(); I != E; ++I) + OS << *I << ' ' << **I << '\n'; + } + + raw_ostream &operator<< (raw_ostream &OS, + const NodeVect &S) LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) { + dump_node_container(OS, S); + return OS; + } + + + raw_ostream &operator<< (raw_ostream &OS, + const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){ + typedef NodeToUsesMap::const_iterator const_iterator; + for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) { + const UseSet &Us = I->second; + OS << I->first << " -> #" << Us.size() << '{'; + for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) { + User *R = (*J)->getUser(); + if (R->hasName()) + OS << ' ' << R->getName(); + else + OS << " <?>(" << *R << ')'; + } + OS << " }\n"; + } + return OS; + } + + + struct in_set { + in_set(const NodeSet &S) : NS(S) {} + bool operator() (GepNode *N) const { + return NS.find(N) != NS.end(); + } + private: + const NodeSet &NS; + }; +} + + +inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) { + return A.Allocate(); +} + + +void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root, + ValueVect &Order) { + // Compute block ordering for a typical DT-based traversal of the flow + // graph: "before visiting a block, all of its dominators must have been + // visited". + + Order.push_back(Root); + DomTreeNode *DTN = DT->getNode(Root); + typedef GraphTraits<DomTreeNode*> GTN; + typedef GTN::ChildIteratorType Iter; + for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I) + getBlockTraversalOrder((*I)->getBlock(), Order); +} + + +bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) { + // No vector GEPs. + if (!GepI->getType()->isPointerTy()) + return false; + // No GEPs without any indices. (Is this possible?) + if (GepI->idx_begin() == GepI->idx_end()) + return false; + return true; +} + + +void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, + ValueToNodeMap &NM) { + DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); + GepNode *N = new (*Mem) GepNode; + Value *PtrOp = GepI->getPointerOperand(); + ValueToNodeMap::iterator F = NM.find(PtrOp); + if (F == NM.end()) { + N->BaseVal = PtrOp; + N->Flags |= GepNode::Root; + } else { + // If PtrOp was a GEP instruction, it must have already been processed. + // The ValueToNodeMap entry for it is the last gep node in the generated + // chain. Link to it here. + N->Parent = F->second; + } + N->PTy = PtrOp->getType(); + N->Idx = *GepI->idx_begin(); + + // Collect the list of users of this GEP instruction. Will add it to the + // last node created for it. + UseSet Us; + for (Value::user_iterator UI = GepI->user_begin(), UE = GepI->user_end(); + UI != UE; ++UI) { + // Check if this gep is used by anything other than other geps that + // we will process. + if (isa<GetElementPtrInst>(*UI)) { + GetElementPtrInst *UserG = cast<GetElementPtrInst>(*UI); + if (isHandledGepForm(UserG)) + continue; + } + Us.insert(&UI.getUse()); + } + Nodes.push_back(N); + NodeOrder.insert(N); + + // Skip the first index operand, since we only handle 0. This dereferences + // the pointer operand. + GepNode *PN = N; + Type *PtrTy = cast<PointerType>(PtrOp->getType())->getElementType(); + for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end(); + OI != OE; ++OI) { + Value *Op = *OI; + GepNode *Nx = new (*Mem) GepNode; + Nx->Parent = PN; // Link Nx to the previous node. + Nx->Flags |= GepNode::Internal; + Nx->PTy = PtrTy; + Nx->Idx = Op; + Nodes.push_back(Nx); + NodeOrder.insert(Nx); + PN = Nx; + + PtrTy = next_type(PtrTy, Op); + } + + // After last node has been created, update the use information. + if (!Us.empty()) { + PN->Flags |= GepNode::Used; + Uses[PN].insert(Us.begin(), Us.end()); + } + + // Link the last node with the originating GEP instruction. This is to + // help with linking chained GEP instructions. + NM.insert(std::make_pair(GepI, PN)); +} + + +void HexagonCommonGEP::collect() { + // Establish depth-first traversal order of the dominator tree. + ValueVect BO; + getBlockTraversalOrder(&Fn->front(), BO); + + // The creation of gep nodes requires DT-traversal. When processing a GEP + // instruction that uses another GEP instruction as the base pointer, the + // gep node for the base pointer should already exist. + ValueToNodeMap NM; + for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) { + BasicBlock *B = cast<BasicBlock>(*I); + for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) { + if (!isa<GetElementPtrInst>(J)) + continue; + GetElementPtrInst *GepI = cast<GetElementPtrInst>(J); + if (isHandledGepForm(GepI)) + processGepInst(GepI, NM); + } + } + + DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes); +} + + +namespace { + void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM, + NodeVect &Roots) { + typedef NodeVect::const_iterator const_iterator; + for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + if (N->Flags & GepNode::Root) { + Roots.push_back(N); + continue; + } + GepNode *PN = N->Parent; + NCM[PN].push_back(N); + } + } + + void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) { + NodeVect Work; + Work.push_back(Root); + Nodes.insert(Root); + + while (!Work.empty()) { + NodeVect::iterator First = Work.begin(); + GepNode *N = *First; + Work.erase(First); + NodeChildrenMap::iterator CF = NCM.find(N); + if (CF != NCM.end()) { + Work.insert(Work.end(), CF->second.begin(), CF->second.end()); + Nodes.insert(CF->second.begin(), CF->second.end()); + } + } + } +} + + +namespace { + typedef std::set<NodeSet> NodeSymRel; + typedef std::pair<GepNode*,GepNode*> NodePair; + typedef std::set<NodePair> NodePairSet; + + const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) { + for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I) + if (I->count(N)) + return &*I; + return 0; + } + + // Create an ordered pair of GepNode pointers. The pair will be used in + // determining equality. The only purpose of the ordering is to eliminate + // duplication due to the commutativity of equality/non-equality. + NodePair node_pair(GepNode *N1, GepNode *N2) { + uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2); + if (P1 <= P2) + return std::make_pair(N1, N2); + return std::make_pair(N2, N1); + } + + unsigned node_hash(GepNode *N) { + // Include everything except flags and parent. + FoldingSetNodeID ID; + ID.AddPointer(N->Idx); + ID.AddPointer(N->PTy); + return ID.ComputeHash(); + } + + bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) { + // Don't cache the result for nodes with different hashes. The hash + // comparison is fast enough. + if (node_hash(N1) != node_hash(N2)) + return false; + + NodePair NP = node_pair(N1, N2); + NodePairSet::iterator FEq = Eq.find(NP); + if (FEq != Eq.end()) + return true; + NodePairSet::iterator FNe = Ne.find(NP); + if (FNe != Ne.end()) + return false; + // Not previously compared. + bool Root1 = N1->Flags & GepNode::Root; + bool Root2 = N2->Flags & GepNode::Root; + NodePair P = node_pair(N1, N2); + // If the Root flag has different values, the nodes are different. + // If both nodes are root nodes, but their base pointers differ, + // they are different. + if (Root1 != Root2 || (Root1 && N1->BaseVal != N2->BaseVal)) { + Ne.insert(P); + return false; + } + // Here the root flags are identical, and for root nodes the + // base pointers are equal, so the root nodes are equal. + // For non-root nodes, compare their parent nodes. + if (Root1 || node_eq(N1->Parent, N2->Parent, Eq, Ne)) { + Eq.insert(P); + return true; + } + return false; + } +} + + +void HexagonCommonGEP::common() { + // The essence of this commoning is finding gep nodes that are equal. + // To do this we need to compare all pairs of nodes. To save time, + // first, partition the set of all nodes into sets of potentially equal + // nodes, and then compare pairs from within each partition. + typedef std::map<unsigned,NodeSet> NodeSetMap; + NodeSetMap MaybeEq; + + for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + unsigned H = node_hash(N); + MaybeEq[H].insert(N); + } + + // Compute the equivalence relation for the gep nodes. Use two caches, + // one for equality and the other for non-equality. + NodeSymRel EqRel; // Equality relation (as set of equivalence classes). + NodePairSet Eq, Ne; // Caches. + for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end(); + I != E; ++I) { + NodeSet &S = I->second; + for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) { + GepNode *N = *NI; + // If node already has a class, then the class must have been created + // in a prior iteration of this loop. Since equality is transitive, + // nothing more will be added to that class, so skip it. + if (node_class(N, EqRel)) + continue; + + // Create a new class candidate now. + NodeSet C; + for (NodeSet::iterator NJ = std::next(NI); NJ != NE; ++NJ) + if (node_eq(N, *NJ, Eq, Ne)) + C.insert(*NJ); + // If Tmp is empty, N would be the only element in it. Don't bother + // creating a class for it then. + if (!C.empty()) { + C.insert(N); // Finalize the set before adding it to the relation. + std::pair<NodeSymRel::iterator, bool> Ins = EqRel.insert(C); + (void)Ins; + assert(Ins.second && "Cannot add a class"); + } + } + } + + DEBUG({ + dbgs() << "Gep node equality:\n"; + for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I) + dbgs() << "{ " << I->first << ", " << I->second << " }\n"; + + dbgs() << "Gep equivalence classes:\n"; + for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) { + dbgs() << '{'; + const NodeSet &S = *I; + for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) { + if (J != S.begin()) + dbgs() << ','; + dbgs() << ' ' << *J; + } + dbgs() << " }\n"; + } + }); + + + // Create a projection from a NodeSet to the minimal element in it. + typedef std::map<const NodeSet*,GepNode*> ProjMap; + ProjMap PM; + for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) { + const NodeSet &S = *I; + GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder); + std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min)); + (void)Ins; + assert(Ins.second && "Cannot add minimal element"); + + // Update the min element's flags, and user list. + uint32_t Flags = 0; + UseSet &MinUs = Uses[Min]; + for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) { + GepNode *N = *J; + uint32_t NF = N->Flags; + // If N is used, append all original values of N to the list of + // original values of Min. + if (NF & GepNode::Used) + MinUs.insert(Uses[N].begin(), Uses[N].end()); + Flags |= NF; + } + if (MinUs.empty()) + Uses.erase(Min); + + // The collected flags should include all the flags from the min element. + assert((Min->Flags & Flags) == Min->Flags); + Min->Flags = Flags; + } + + // Commoning: for each non-root gep node, replace "Parent" with the + // selected (minimum) node from the corresponding equivalence class. + // If a given parent does not have an equivalence class, leave it + // unchanged (it means that it's the only element in its class). + for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + if (N->Flags & GepNode::Root) + continue; + const NodeSet *PC = node_class(N->Parent, EqRel); + if (!PC) + continue; + ProjMap::iterator F = PM.find(PC); + if (F == PM.end()) + continue; + // Found a replacement, use it. + GepNode *Rep = F->second; + N->Parent = Rep; + } + + DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes); + + // Finally, erase the nodes that are no longer used. + NodeSet Erase; + for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + const NodeSet *PC = node_class(N, EqRel); + if (!PC) + continue; + ProjMap::iterator F = PM.find(PC); + if (F == PM.end()) + continue; + if (N == F->second) + continue; + // Node for removal. + Erase.insert(*I); + } + NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(), + in_set(Erase)); + Nodes.resize(std::distance(Nodes.begin(), NewE)); + + DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes); +} + + +namespace { + template <typename T> + BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) { + DEBUG({ + dbgs() << "NCD of {"; + for (typename T::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + if (!*I) + continue; + BasicBlock *B = cast<BasicBlock>(*I); + dbgs() << ' ' << B->getName(); + } + dbgs() << " }\n"; + }); + + // Allow null basic blocks in Blocks. In such cases, return 0. + typename T::iterator I = Blocks.begin(), E = Blocks.end(); + if (I == E || !*I) + return 0; + BasicBlock *Dom = cast<BasicBlock>(*I); + while (++I != E) { + BasicBlock *B = cast_or_null<BasicBlock>(*I); + Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0; + if (!Dom) + return 0; + } + DEBUG(dbgs() << "computed:" << Dom->getName() << '\n'); + return Dom; + } + + template <typename T> + BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) { + // If two blocks, A and B, dominate a block C, then A dominates B, + // or B dominates A. + typename T::iterator I = Blocks.begin(), E = Blocks.end(); + // Find the first non-null block. + while (I != E && !*I) + ++I; + if (I == E) + return DT->getRoot(); + BasicBlock *DomB = cast<BasicBlock>(*I); + while (++I != E) { + if (!*I) + continue; + BasicBlock *B = cast<BasicBlock>(*I); + if (DT->dominates(B, DomB)) + continue; + if (!DT->dominates(DomB, B)) + return 0; + DomB = B; + } + return DomB; + } + + // Find the first use in B of any value from Values. If no such use, + // return B->end(). + template <typename T> + BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) { + BasicBlock::iterator FirstUse = B->end(), BEnd = B->end(); + typedef typename T::iterator iterator; + for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) { + Value *V = *I; + // If V is used in a PHI node, the use belongs to the incoming block, + // not the block with the PHI node. In the incoming block, the use + // would be considered as being at the end of it, so it cannot + // influence the position of the first use (which is assumed to be + // at the end to start with). + if (isa<PHINode>(V)) + continue; + if (!isa<Instruction>(V)) + continue; + Instruction *In = cast<Instruction>(V); + if (In->getParent() != B) + continue; + BasicBlock::iterator It = In->getIterator(); + if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd)) + FirstUse = It; + } + return FirstUse; + } + + bool is_empty(const BasicBlock *B) { + return B->empty() || (&*B->begin() == B->getTerminator()); + } +} + + +BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + DEBUG(dbgs() << "Loc for node:" << Node << '\n'); + // Recalculate the placement for Node, assuming that the locations of + // its children in Loc are valid. + // Return 0 if there is no valid placement for Node (for example, it + // uses an index value that is not available at the location required + // to dominate all children, etc.). + + // Find the nearest common dominator for: + // - all users, if the node is used, and + // - all children. + ValueVect Bs; + if (Node->Flags & GepNode::Used) { + // Append all blocks with uses of the original values to the + // block vector Bs. + NodeToUsesMap::iterator UF = Uses.find(Node); + assert(UF != Uses.end() && "Used node with no use information"); + UseSet &Us = UF->second; + for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) { + Use *U = *I; + User *R = U->getUser(); + if (!isa<Instruction>(R)) + continue; + BasicBlock *PB = isa<PHINode>(R) + ? cast<PHINode>(R)->getIncomingBlock(*U) + : cast<Instruction>(R)->getParent(); + Bs.push_back(PB); + } + } + // Append the location of each child. + NodeChildrenMap::iterator CF = NCM.find(Node); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) { + GepNode *CN = *I; + NodeToValueMap::iterator LF = Loc.find(CN); + // If the child is only used in GEP instructions (i.e. is not used in + // non-GEP instructions), the nearest dominator computed for it may + // have been null. In such case it won't have a location available. + if (LF == Loc.end()) + continue; + Bs.push_back(LF->second); + } + } + + BasicBlock *DomB = nearest_common_dominator(DT, Bs); + if (!DomB) + return 0; + // Check if the index used by Node dominates the computed dominator. + Instruction *IdxI = dyn_cast<Instruction>(Node->Idx); + if (IdxI && !DT->dominates(IdxI->getParent(), DomB)) + return 0; + + // Avoid putting nodes into empty blocks. + while (is_empty(DomB)) { + DomTreeNode *N = (*DT)[DomB]->getIDom(); + if (!N) + break; + DomB = N->getBlock(); + } + + // Otherwise, DomB is fine. Update the location map. + Loc[Node] = DomB; + return DomB; +} + + +BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n'); + // Recalculate the placement of Node, after recursively recalculating the + // placements of all its children. + NodeChildrenMap::iterator CF = NCM.find(Node); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) + recalculatePlacementRec(*I, NCM, Loc); + } + BasicBlock *LB = recalculatePlacement(Node, NCM, Loc); + DEBUG(dbgs() << "LocRec end for node:" << Node << '\n'); + return LB; +} + + +bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) { + if (isa<Constant>(Val) || isa<Argument>(Val)) + return true; + Instruction *In = dyn_cast<Instruction>(Val); + if (!In) + return false; + BasicBlock *HdrB = L->getHeader(), *DefB = In->getParent(); + return DT->properlyDominates(DefB, HdrB); +} + + +bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) { + if (Node->Flags & GepNode::Root) + if (!isInvariantIn(Node->BaseVal, L)) + return false; + return isInvariantIn(Node->Idx, L); +} + + +bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) { + BasicBlock *HB = L->getHeader(); + BasicBlock *LB = L->getLoopLatch(); + // B must post-dominate the loop header or dominate the loop latch. + if (PDT->dominates(B, HB)) + return true; + if (LB && DT->dominates(B, LB)) + return true; + return false; +} + + +namespace { + BasicBlock *preheader(DominatorTree *DT, Loop *L) { + if (BasicBlock *PH = L->getLoopPreheader()) + return PH; + if (!OptSpeculate) + return 0; + DomTreeNode *DN = DT->getNode(L->getHeader()); + if (!DN) + return 0; + return DN->getIDom()->getBlock(); + } +} + + +BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + // Find the "topmost" location for Node: it must be dominated by both, + // its parent (or the BaseVal, if it's a root node), and by the index + // value. + ValueVect Bs; + if (Node->Flags & GepNode::Root) { + if (Instruction *PIn = dyn_cast<Instruction>(Node->BaseVal)) + Bs.push_back(PIn->getParent()); + } else { + Bs.push_back(Loc[Node->Parent]); + } + if (Instruction *IIn = dyn_cast<Instruction>(Node->Idx)) + Bs.push_back(IIn->getParent()); + BasicBlock *TopB = nearest_common_dominatee(DT, Bs); + + // Traverse the loop nest upwards until we find a loop in which Node + // is no longer invariant, or until we get to the upper limit of Node's + // placement. The traversal will also stop when a suitable "preheader" + // cannot be found for a given loop. The "preheader" may actually be + // a regular block outside of the loop (i.e. not guarded), in which case + // the Node will be speculated. + // For nodes that are not in the main path of the containing loop (i.e. + // are not executed in each iteration), do not move them out of the loop. + BasicBlock *LocB = cast_or_null<BasicBlock>(Loc[Node]); + if (LocB) { + Loop *Lp = LI->getLoopFor(LocB); + while (Lp) { + if (!isInvariantIn(Node, Lp) || !isInMainPath(LocB, Lp)) + break; + BasicBlock *NewLoc = preheader(DT, Lp); + if (!NewLoc || !DT->dominates(TopB, NewLoc)) + break; + Lp = Lp->getParentLoop(); + LocB = NewLoc; + } + } + Loc[Node] = LocB; + + // Recursively compute the locations of all children nodes. + NodeChildrenMap::iterator CF = NCM.find(Node); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) + adjustForInvariance(*I, NCM, Loc); + } + return LocB; +} + + +namespace { + struct LocationAsBlock { + LocationAsBlock(const NodeToValueMap &L) : Map(L) {} + const NodeToValueMap ⤅ + }; + + raw_ostream &operator<< (raw_ostream &OS, + const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ; + raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) { + for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end(); + I != E; ++I) { + OS << I->first << " -> "; + BasicBlock *B = cast<BasicBlock>(I->second); + OS << B->getName() << '(' << B << ')'; + OS << '\n'; + } + return OS; + } + + inline bool is_constant(GepNode *N) { + return isa<ConstantInt>(N->Idx); + } +} + + +void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U, + NodeToValueMap &Loc) { + User *R = U->getUser(); + DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: " + << *R << '\n'); + BasicBlock *PB = cast<Instruction>(R)->getParent(); + + GepNode *N = Node; + GepNode *C = 0, *NewNode = 0; + while (is_constant(N) && !(N->Flags & GepNode::Root)) { + // XXX if (single-use) dont-replicate; + GepNode *NewN = new (*Mem) GepNode(N); + Nodes.push_back(NewN); + Loc[NewN] = PB; + + if (N == Node) + NewNode = NewN; + NewN->Flags &= ~GepNode::Used; + if (C) + C->Parent = NewN; + C = NewN; + N = N->Parent; + } + if (!NewNode) + return; + + // Move over all uses that share the same user as U from Node to NewNode. + NodeToUsesMap::iterator UF = Uses.find(Node); + assert(UF != Uses.end()); + UseSet &Us = UF->second; + UseSet NewUs; + for (UseSet::iterator I = Us.begin(); I != Us.end(); ) { + User *S = (*I)->getUser(); + UseSet::iterator Nx = std::next(I); + if (S == R) { + NewUs.insert(*I); + Us.erase(I); + } + I = Nx; + } + if (Us.empty()) { + Node->Flags &= ~GepNode::Used; + Uses.erase(UF); + } + + // Should at least have U in NewUs. + NewNode->Flags |= GepNode::Used; + DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n'); + assert(!NewUs.empty()); + Uses[NewNode] = NewUs; +} + + +void HexagonCommonGEP::separateConstantChains(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + // First approximation: extract all chains. + NodeSet Ns; + nodes_for_root(Node, NCM, Ns); + + DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n'); + // Collect all used nodes together with the uses from loads and stores, + // where the GEP node could be folded into the load/store instruction. + NodeToUsesMap FNs; // Foldable nodes. + for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) { + GepNode *N = *I; + if (!(N->Flags & GepNode::Used)) + continue; + NodeToUsesMap::iterator UF = Uses.find(N); + assert(UF != Uses.end()); + UseSet &Us = UF->second; + // Loads/stores that use the node N. + UseSet LSs; + for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) { + Use *U = *J; + User *R = U->getUser(); + // We're interested in uses that provide the address. It can happen + // that the value may also be provided via GEP, but we won't handle + // those cases here for now. + if (LoadInst *Ld = dyn_cast<LoadInst>(R)) { + unsigned PtrX = LoadInst::getPointerOperandIndex(); + if (&Ld->getOperandUse(PtrX) == U) + LSs.insert(U); + } else if (StoreInst *St = dyn_cast<StoreInst>(R)) { + unsigned PtrX = StoreInst::getPointerOperandIndex(); + if (&St->getOperandUse(PtrX) == U) + LSs.insert(U); + } + } + // Even if the total use count is 1, separating the chain may still be + // beneficial, since the constant chain may be longer than the GEP alone + // would be (e.g. if the parent node has a constant index and also has + // other children). + if (!LSs.empty()) + FNs.insert(std::make_pair(N, LSs)); + } + + DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs); + + for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) { + GepNode *N = I->first; + UseSet &Us = I->second; + for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) + separateChainForNode(N, *J, Loc); + } +} + + +void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) { + // Compute the inverse of the Node.Parent links. Also, collect the set + // of root nodes. + NodeChildrenMap NCM; + NodeVect Roots; + invert_find_roots(Nodes, NCM, Roots); + + // Compute the initial placement determined by the users' locations, and + // the locations of the child nodes. + for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) + recalculatePlacementRec(*I, NCM, Loc); + + DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc)); + + if (OptEnableInv) { + for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) + adjustForInvariance(*I, NCM, Loc); + + DEBUG(dbgs() << "Node placement after adjustment for invariance:\n" + << LocationAsBlock(Loc)); + } + if (OptEnableConst) { + for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) + separateConstantChains(*I, NCM, Loc); + } + DEBUG(dbgs() << "Node use information:\n" << Uses); + + // At the moment, there is no further refinement of the initial placement. + // Such a refinement could include splitting the nodes if they are placed + // too far from some of its users. + + DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc)); +} + + +Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, + BasicBlock *LocB) { + DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName() + << " for nodes:\n" << NA); + unsigned Num = NA.size(); + GepNode *RN = NA[0]; + assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); + + Value *NewInst = 0; + Value *Input = RN->BaseVal; + Value **IdxList = new Value*[Num+1]; + unsigned nax = 0; + do { + unsigned IdxC = 0; + // If the type of the input of the first node is not a pointer, + // we need to add an artificial i32 0 to the indices (because the + // actual input in the IR will be a pointer). + if (!NA[nax]->PTy->isPointerTy()) { + Type *Int32Ty = Type::getInt32Ty(*Ctx); + IdxList[IdxC++] = ConstantInt::get(Int32Ty, 0); + } + + // Keep adding indices from NA until we have to stop and generate + // an "intermediate" GEP. + while (++nax <= Num) { + GepNode *N = NA[nax-1]; + IdxList[IdxC++] = N->Idx; + if (nax < Num) { + // We have to stop, if the expected type of the output of this node + // is not the same as the input type of the next node. + Type *NextTy = next_type(N->PTy, N->Idx); + if (NextTy != NA[nax]->PTy) + break; + } + } + ArrayRef<Value*> A(IdxList, IdxC); + Type *InpTy = Input->getType(); + Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType(); + NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); + DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); + Input = NewInst; + } while (nax <= Num); + + delete[] IdxList; + return NewInst; +} + + +void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values, + NodeChildrenMap &NCM) { + NodeVect Work; + Work.push_back(Node); + + while (!Work.empty()) { + NodeVect::iterator First = Work.begin(); + GepNode *N = *First; + Work.erase(First); + if (N->Flags & GepNode::Used) { + NodeToUsesMap::iterator UF = Uses.find(N); + assert(UF != Uses.end() && "No use information for used node"); + UseSet &Us = UF->second; + for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) + Values.push_back((*I)->getUser()); + } + NodeChildrenMap::iterator CF = NCM.find(N); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + Work.insert(Work.end(), Cs.begin(), Cs.end()); + } + } +} + + +void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { + DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n'); + NodeChildrenMap NCM; + NodeVect Roots; + // Compute the inversion again, since computing placement could alter + // "parent" relation between nodes. + invert_find_roots(Nodes, NCM, Roots); + + while (!Roots.empty()) { + NodeVect::iterator First = Roots.begin(); + GepNode *Root = *First, *Last = *First; + Roots.erase(First); + + NodeVect NA; // Nodes to assemble. + // Append to NA all child nodes up to (and including) the first child + // that: + // (1) has more than 1 child, or + // (2) is used, or + // (3) has a child located in a different block. + bool LastUsed = false; + unsigned LastCN = 0; + // The location may be null if the computation failed (it can legitimately + // happen for nodes created from dead GEPs). + Value *LocV = Loc[Last]; + if (!LocV) + continue; + BasicBlock *LastB = cast<BasicBlock>(LocV); + do { + NA.push_back(Last); + LastUsed = (Last->Flags & GepNode::Used); + if (LastUsed) + break; + NodeChildrenMap::iterator CF = NCM.find(Last); + LastCN = (CF != NCM.end()) ? CF->second.size() : 0; + if (LastCN != 1) + break; + GepNode *Child = CF->second.front(); + BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]); + if (ChildB != 0 && LastB != ChildB) + break; + Last = Child; + } while (true); + + BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator(); + if (LastUsed || LastCN > 0) { + ValueVect Urs; + getAllUsersForNode(Root, Urs, NCM); + BasicBlock::iterator FirstUse = first_use_of_in_block(Urs, LastB); + if (FirstUse != LastB->end()) + InsertAt = FirstUse; + } + + // Generate a new instruction for NA. + Value *NewInst = fabricateGEP(NA, InsertAt, LastB); + + // Convert all the children of Last node into roots, and append them + // to the Roots list. + if (LastCN > 0) { + NodeVect &Cs = NCM[Last]; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) { + GepNode *CN = *I; + CN->Flags &= ~GepNode::Internal; + CN->Flags |= GepNode::Root; + CN->BaseVal = NewInst; + Roots.push_back(CN); + } + } + + // Lastly, if the Last node was used, replace all uses with the new GEP. + // The uses reference the original GEP values. + if (LastUsed) { + NodeToUsesMap::iterator UF = Uses.find(Last); + assert(UF != Uses.end() && "No use information found"); + UseSet &Us = UF->second; + for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) { + Use *U = *I; + U->set(NewInst); + } + } + } +} + + +void HexagonCommonGEP::removeDeadCode() { + ValueVect BO; + BO.push_back(&Fn->front()); + + for (unsigned i = 0; i < BO.size(); ++i) { + BasicBlock *B = cast<BasicBlock>(BO[i]); + DomTreeNode *N = DT->getNode(B); + typedef GraphTraits<DomTreeNode*> GTN; + typedef GTN::ChildIteratorType Iter; + for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) + BO.push_back((*I)->getBlock()); + } + + for (unsigned i = BO.size(); i > 0; --i) { + BasicBlock *B = cast<BasicBlock>(BO[i-1]); + BasicBlock::InstListType &IL = B->getInstList(); + typedef BasicBlock::InstListType::reverse_iterator reverse_iterator; + ValueVect Ins; + for (reverse_iterator I = IL.rbegin(), E = IL.rend(); I != E; ++I) + Ins.push_back(&*I); + for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) { + Instruction *In = cast<Instruction>(*I); + if (isInstructionTriviallyDead(In)) + In->eraseFromParent(); + } + } +} + + +bool HexagonCommonGEP::runOnFunction(Function &F) { + // For now bail out on C++ exception handling. + for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A) + for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I) + if (isa<InvokeInst>(I) || isa<LandingPadInst>(I)) + return false; + + Fn = &F; + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + PDT = &getAnalysis<PostDominatorTree>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + Ctx = &F.getContext(); + + Nodes.clear(); + Uses.clear(); + NodeOrder.clear(); + + SpecificBumpPtrAllocator<GepNode> Allocator; + Mem = &Allocator; + + collect(); + common(); + + NodeToValueMap Loc; + computeNodePlacement(Loc); + materialize(Loc); + removeDeadCode(); + +#ifdef XDEBUG + // Run this only when expensive checks are enabled. + verifyFunction(F); +#endif + return true; +} + + +namespace llvm { + FunctionPass *createHexagonCommonGEP() { + return new HexagonCommonGEP(); + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp new file mode 100644 index 0000000..9fd863f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -0,0 +1,754 @@ +//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass replaces transfer instructions by combine instructions. +// We walk along a basic block and look for two combinable instructions and try +// to move them together. If we can move them next to each other we do so and +// replace them with a combine instruction. +//===----------------------------------------------------------------------===// +#include "llvm/PassSupport.h" +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-copy-combine" + +static +cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines", + cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Disable merging into combines")); +static +cl::opt<unsigned> +MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store", + cl::Hidden, cl::init(4), + cl::desc("Maximum distance between a tfr feeding a store we " + "consider the store still to be newifiable")); + +namespace llvm { + FunctionPass *createHexagonCopyToCombine(); + void initializeHexagonCopyToCombinePass(PassRegistry&); +} + + +namespace { + +class HexagonCopyToCombine : public MachineFunctionPass { + const HexagonInstrInfo *TII; + const TargetRegisterInfo *TRI; + bool ShouldCombineAggressively; + + DenseSet<MachineInstr *> PotentiallyNewifiableTFR; +public: + static char ID; + + HexagonCopyToCombine() : MachineFunctionPass(ID) { + initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const override { + return "Hexagon Copy-To-Combine Pass"; + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1); + + void findPotentialNewifiableTFRs(MachineBasicBlock &); + + void combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, bool DoInsertAtI1); + + bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2, + unsigned I1DestReg, unsigned I2DestReg, + bool &DoInsertAtI1); + + void emitCombineRR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineRI(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineIR(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); +}; + +} // End anonymous namespace. + +char HexagonCopyToCombine::ID = 0; + +INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine", + "Hexagon Copy-To-Combine Pass", false, false) + +static bool isCombinableInstType(MachineInstr *MI, + const HexagonInstrInfo *TII, + bool ShouldCombineAggressively) { + switch(MI->getOpcode()) { + case Hexagon::A2_tfr: { + // A COPY instruction can be combined if its arguments are IntRegs (32bit). + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isReg()); + + unsigned DestReg = Op0.getReg(); + unsigned SrcReg = Op1.getReg(); + return Hexagon::IntRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg); + } + + case Hexagon::A2_tfrsi: { + // A transfer-immediate can be combined if its argument is a signed 8bit + // value. + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg()); + + unsigned DestReg = Op0.getReg(); + // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a + // workaround for an ABI bug that prevents GOT relocations on combine + // instructions + if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG) + return false; + + // Only combine constant extended A2_tfrsi if we are in aggressive mode. + bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm()); + return Hexagon::IntRegsRegClass.contains(DestReg) && + (ShouldCombineAggressively || NotExt); + } + + default: + break; + } + + return false; +} + +template <unsigned N> +static bool isGreaterThanNBitTFRI(const MachineInstr *I) { + if (I->getOpcode() == Hexagon::TFRI64_V4 || + I->getOpcode() == Hexagon::A2_tfrsi) { + const MachineOperand &Op = I->getOperand(1); + return !Op.isImm() || !isInt<N>(Op.getImm()); + } + return false; +} + +/// areCombinableOperations - Returns true if the two instruction can be merge +/// into a combine (ignoring register constraints). +static bool areCombinableOperations(const TargetRegisterInfo *TRI, + MachineInstr *HighRegInst, + MachineInstr *LowRegInst) { + unsigned HiOpc = HighRegInst->getOpcode(); + unsigned LoOpc = LowRegInst->getOpcode(); + (void)HiOpc; // Fix compiler warning + (void)LoOpc; // Fix compiler warning + assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) && + (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) && + "Assume individual instructions are of a combinable type"); + + // There is no combine of two constant extended values. + if (isGreaterThanNBitTFRI<8>(HighRegInst) && + isGreaterThanNBitTFRI<6>(LowRegInst)) + return false; + + return true; +} + +static bool isEvenReg(unsigned Reg) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + Hexagon::IntRegsRegClass.contains(Reg)); + return (Reg - Hexagon::R0) % 2 == 0; +} + +static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) { + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill()) + continue; + Op.setIsKill(false); + } +} + +/// isUnsafeToMoveAcross - Returns true if it is unsafe to move a copy +/// instruction from \p UseReg to \p DestReg over the instruction \p I. +static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg, + unsigned DestReg, + const TargetRegisterInfo *TRI) { + return (UseReg && (I->modifiesRegister(UseReg, TRI))) || + I->modifiesRegister(DestReg, TRI) || + I->readsRegister(DestReg, TRI) || + I->hasUnmodeledSideEffects() || + I->isInlineAsm() || I->isDebugValue(); +} + +static unsigned UseReg(const MachineOperand& MO) { + return MO.isReg() ? MO.getReg() : 0; +} + +/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such +/// that the two instructions can be paired in a combine. +bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, + MachineInstr *I2, + unsigned I1DestReg, + unsigned I2DestReg, + bool &DoInsertAtI1) { + unsigned I2UseReg = UseReg(I2->getOperand(1)); + + // It is not safe to move I1 and I2 into one combine if I2 has a true + // dependence on I1. + if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI)) + return false; + + bool isSafe = true; + + // First try to move I2 towards I1. + { + // A reverse_iterator instantiated like below starts before I2, and I1 + // respectively. + // Look at instructions I in between I2 and (excluding) I1. + MachineBasicBlock::reverse_iterator I(I2), + End = --(MachineBasicBlock::reverse_iterator(I1)); + // At 03 we got better results (dhrystone!) by being more conservative. + if (!ShouldCombineAggressively) + End = MachineBasicBlock::reverse_iterator(I1); + // If I2 kills its operand and we move I2 over an instruction that also + // uses I2's use reg we need to modify that (first) instruction to now kill + // this reg. + unsigned KilledOperand = 0; + if (I2->killsRegister(I2UseReg)) + KilledOperand = I2UseReg; + MachineInstr *KillingInstr = nullptr; + + for (; I != End; ++I) { + // If the intervening instruction I: + // * modifies I2's use reg + // * modifies I2's def reg + // * reads I2's def reg + // * or has unmodelled side effects + // we can't move I2 across it. + if (isUnsafeToMoveAcross(&*I, I2UseReg, I2DestReg, TRI)) { + isSafe = false; + break; + } + + // Update first use of the killed operand. + if (!KillingInstr && KilledOperand && + I->readsRegister(KilledOperand, TRI)) + KillingInstr = &*I; + } + if (isSafe) { + // Update the intermediate instruction to with the kill flag. + if (KillingInstr) { + bool Added = KillingInstr->addRegisterKilled(KilledOperand, TRI, true); + (void)Added; // suppress compiler warning + assert(Added && "Must successfully update kill flag"); + removeKillInfo(I2, KilledOperand); + } + DoInsertAtI1 = true; + return true; + } + } + + // Try to move I1 towards I2. + { + // Look at instructions I in between I1 and (excluding) I2. + MachineBasicBlock::iterator I(I1), End(I2); + // At O3 we got better results (dhrystone) by being more conservative here. + if (!ShouldCombineAggressively) + End = std::next(MachineBasicBlock::iterator(I2)); + unsigned I1UseReg = UseReg(I1->getOperand(1)); + // Track killed operands. If we move across an instruction that kills our + // operand, we need to update the kill information on the moved I1. It kills + // the operand now. + MachineInstr *KillingInstr = nullptr; + unsigned KilledOperand = 0; + + while(++I != End) { + // If the intervening instruction I: + // * modifies I1's use reg + // * modifies I1's def reg + // * reads I1's def reg + // * or has unmodelled side effects + // We introduce this special case because llvm has no api to remove a + // kill flag for a register (a removeRegisterKilled() analogous to + // addRegisterKilled) that handles aliased register correctly. + // * or has a killed aliased register use of I1's use reg + // %D4<def> = TFRI64 16 + // %R6<def> = TFR %R9 + // %R8<def> = KILL %R8, %D4<imp-use,kill> + // If we want to move R6 = across the KILL instruction we would have + // to remove the %D4<imp-use,kill> operand. For now, we are + // conservative and disallow the move. + // we can't move I1 across it. + if (isUnsafeToMoveAcross(I, I1UseReg, I1DestReg, TRI) || + // Check for an aliased register kill. Bail out if we see one. + (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI))) + return false; + + // Check for an exact kill (registers match). + if (I1UseReg && I->killsRegister(I1UseReg)) { + assert(!KillingInstr && "Should only see one killing instruction"); + KilledOperand = I1UseReg; + KillingInstr = &*I; + } + } + if (KillingInstr) { + removeKillInfo(KillingInstr, KilledOperand); + // Update I1 to set the kill flag. This flag will later be picked up by + // the new COMBINE instruction. + bool Added = I1->addRegisterKilled(KilledOperand, TRI); + (void)Added; // suppress compiler warning + assert(Added && "Must successfully update kill flag"); + } + DoInsertAtI1 = false; + } + + return true; +} + +/// findPotentialNewifiableTFRs - Finds tranfers that feed stores that could be +/// newified. (A use of a 64 bit register define can not be newified) +void +HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { + DenseMap<unsigned, MachineInstr *> LastDef; + for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + MachineInstr *MI = I; + // Mark TFRs that feed a potential new value store as such. + if(TII->mayBeNewStore(MI)) { + // Look for uses of TFR instructions. + for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE; + ++OpdIdx) { + MachineOperand &Op = MI->getOperand(OpdIdx); + + // Skip over anything except register uses. + if (!Op.isReg() || !Op.isUse() || !Op.getReg()) + continue; + + // Look for the defining instruction. + unsigned Reg = Op.getReg(); + MachineInstr *DefInst = LastDef[Reg]; + if (!DefInst) + continue; + if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively)) + continue; + + // Only close newifiable stores should influence the decision. + MachineBasicBlock::iterator It(DefInst); + unsigned NumInstsToDef = 0; + while (&*It++ != MI) + ++NumInstsToDef; + + if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR) + continue; + + PotentiallyNewifiableTFR.insert(DefInst); + } + // Skip to next instruction. + continue; + } + + // Put instructions that last defined integer or double registers into the + // map. + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || !Op.isDef() || !Op.getReg()) + continue; + unsigned Reg = Op.getReg(); + if (Hexagon::DoubleRegsRegClass.contains(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + LastDef[*SubRegs] = MI; + } + } else if (Hexagon::IntRegsRegClass.contains(Reg)) + LastDef[Reg] = MI; + } + } +} + +bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { + + if (IsCombinesDisabled) return false; + + bool HasChanged = false; + + // Get target info. + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + + // Combine aggressively (for code size) + ShouldCombineAggressively = + MF.getTarget().getOptLevel() <= CodeGenOpt::Default; + + // Traverse basic blocks. + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + PotentiallyNewifiableTFR.clear(); + findPotentialNewifiableTFRs(*BI); + + // Traverse instructions in basic block. + for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end(); + MI != End;) { + MachineInstr *I1 = MI++; + // Don't combine a TFR whose user could be newified (instructions that + // define double registers can not be newified - Programmer's Ref Manual + // 5.4.2 New-value stores). + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1)) + continue; + + // Ignore instructions that are not combinable. + if (!isCombinableInstType(I1, TII, ShouldCombineAggressively)) + continue; + + // Find a second instruction that can be merged into a combine + // instruction. + bool DoInsertAtI1 = false; + MachineInstr *I2 = findPairable(I1, DoInsertAtI1); + if (I2) { + HasChanged = true; + combine(I1, I2, MI, DoInsertAtI1); + } + } + } + + return HasChanged; +} + +/// findPairable - Returns an instruction that can be merged with \p I1 into a +/// COMBINE instruction or 0 if no such instruction can be found. Returns true +/// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1 +/// false if the combine must be inserted at the returned instruction. +MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, + bool &DoInsertAtI1) { + MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1)); + unsigned I1DestReg = I1->getOperand(0).getReg(); + + for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End; + ++I2) { + // Bail out early if we see a second definition of I1DestReg. + if (I2->modifiesRegister(I1DestReg, TRI)) + break; + + // Ignore non-combinable instructions. + if (!isCombinableInstType(I2, TII, ShouldCombineAggressively)) + continue; + + // Don't combine a TFR whose user could be newified. + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2)) + continue; + + unsigned I2DestReg = I2->getOperand(0).getReg(); + + // Check that registers are adjacent and that the first destination register + // is even. + bool IsI1LowReg = (I2DestReg - I1DestReg) == 1; + bool IsI2LowReg = (I1DestReg - I2DestReg) == 1; + unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg; + if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex)) + continue; + + // Check that the two instructions are combinable. V4 allows more + // instructions to be merged into a combine. + // The order matters because in a TFRI we might can encode a int8 as the + // hi reg operand but only a uint6 as the low reg operand. + if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) || + (IsI1LowReg && !areCombinableOperations(TRI, I2, I1))) + break; + + if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg, + DoInsertAtI1)) + return I2; + + // Not safe. Stop searching. + break; + } + return nullptr; +} + +void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, + MachineBasicBlock::iterator &MI, + bool DoInsertAtI1) { + // We are going to delete I2. If MI points to I2 advance it to the next + // instruction. + if ((MachineInstr *)MI == I2) ++MI; + + // Figure out whether I1 or I2 goes into the lowreg part. + unsigned I1DestReg = I1->getOperand(0).getReg(); + unsigned I2DestReg = I2->getOperand(0).getReg(); + bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; + unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; + + // Get the double word register. + unsigned DoubleRegDest = + TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, + &Hexagon::DoubleRegsRegClass); + assert(DoubleRegDest != 0 && "Expect a valid register"); + + + // Setup source operands. + MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) : + I2->getOperand(1); + MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) : + I1->getOperand(1); + + // Figure out which source is a register and which a constant. + bool IsHiReg = HiOperand.isReg(); + bool IsLoReg = LoOperand.isReg(); + + MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2); + // Emit combine. + if (IsHiReg && IsLoReg) + emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsHiReg) + emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsLoReg) + emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else + emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand); + + I1->eraseFromParent(); + I2->eraseFromParent(); +} + +void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle globals. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Handle block addresses. + if (HiOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Handle jump tables. + if (HiOperand.isJTI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isJTI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags()); + return; + } + + // Handle constant pools. + if (HiOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addImm(LoOperand.getImm()); + return; + } + if (LoOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // First preference should be given to Hexagon::A2_combineii instruction + // as it can include U6 (in Hexagon::A4_combineii) as well. + // In this instruction, HiOperand is const extended, if required. + if (isInt<8>(LoOperand.getImm())) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + // In this instruction, LoOperand is const extended, if required. + if (isInt<8>(HiOperand.getImm())) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoReg = LoOperand.getReg(); + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle globals. + if (HiOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Handle block addresses. + if (HiOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Handle jump tables. + if (HiOperand.isJTI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Handle constant pools. + if (HiOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(), + HiOperand.getTargetFlags()) + .addReg(LoReg, LoRegKillFlag); + return; + } + // Insert new combine instruction. + // DoubleRegDest = combine #HiImm, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) + .addImm(HiOperand.getImm()) + .addReg(LoReg, LoRegKillFlag); +} + +void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Handle global. + if (LoOperand.isGlobal()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + // Handle block addresses. + if (LoOperand.isBlockAddress()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + // Handle jump tables. + if (LoOperand.isJTI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiOperand.getReg(), HiRegKillFlag) + .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags()); + return; + } + // Handle constant pools. + if (LoOperand.isCPI()) { + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiOperand.getReg(), HiRegKillFlag) + .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(), + LoOperand.getTargetFlags()); + return; + } + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, #LoImm + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addImm(LoOperand.getImm()); +} + +void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); + unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); + unsigned LoReg = LoOperand.getReg(); + unsigned HiReg = HiOperand.getReg(); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + + // Insert new combine instruction. + // DoubleRegDest = combine HiReg, LoReg + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg) + .addReg(HiReg, HiRegKillFlag) + .addReg(LoReg, LoRegKillFlag); +} + +FunctionPass *llvm::createHexagonCopyToCombine() { + return new HexagonCopyToCombine(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp new file mode 100644 index 0000000..ee0c318 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -0,0 +1,1063 @@ +//===--- HexagonEarlyIfConv.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a Hexagon-specific if-conversion pass that runs on the +// SSA form. +// In SSA it is not straightforward to represent instructions that condi- +// tionally define registers, since a conditionally-defined register may +// only be used under the same condition on which the definition was based. +// To avoid complications of this nature, this patch will only generate +// predicated stores, and speculate other instructions from the "if-conver- +// ted" block. +// The code will recognize CFG patterns where a block with a conditional +// branch "splits" into a "true block" and a "false block". Either of these +// could be omitted (in case of a triangle, for example). +// If after conversion of the side block(s) the CFG allows it, the resul- +// ting blocks may be merged. If the "join" block contained PHI nodes, they +// will be replaced with MUX (or MUX-like) instructions to maintain the +// semantics of the PHI. +// +// Example: +// +// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 +// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 +// J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead> +// J2_jump <BB#4>, %PC<imp-def,dead> +// Successors according to CFG: BB#4(62) BB#5(62) +// +// BB#4: derived from LLVM BB %if.then +// Predecessors according to CFG: BB#3 +// %vreg11<def> = A2_addp %vreg6, %vreg10 +// S2_storerd_io %vreg32, 16, %vreg11 +// Successors according to CFG: BB#5 +// +// BB#5: derived from LLVM BB %if.end +// Predecessors according to CFG: BB#3 BB#4 +// %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4> +// %vreg13<def> = A2_addp %vreg7, %vreg12 +// %vreg42<def> = C2_cmpeqi %vreg9, 10 +// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> +// J2_jump <BB#6>, %PC<imp-def,dead> +// Successors according to CFG: BB#6(4) BB#3(124) +// +// would become: +// +// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 +// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 +// spec-> %vreg11<def> = A2_addp %vreg6, %vreg10 +// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11 +// %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11 +// %vreg13<def> = A2_addp %vreg7, %vreg46 +// %vreg42<def> = C2_cmpeqi %vreg9, 10 +// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> +// J2_jump <BB#6>, %PC<imp-def,dead> +// Successors according to CFG: BB#6 BB#3 + +#define DEBUG_TYPE "hexagon-eif" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "HexagonTargetMachine.h" + +#include <functional> +#include <set> +#include <vector> + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonEarlyIfConversion(); + void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry); +} + +namespace { + cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, + cl::init(false), cl::desc("Enable branch probability info")); + cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden, + cl::desc("Size limit in Hexagon early if-conversion")); + + struct PrintMB { + PrintMB(const MachineBasicBlock *B) : MB(B) {} + const MachineBasicBlock *MB; + }; + raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) { + if (!P.MB) + return OS << "<none>"; + return OS << '#' << P.MB->getNumber(); + } + + struct FlowPattern { + FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {} + FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB, + MachineBasicBlock *FB, MachineBasicBlock *JB) + : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {} + + MachineBasicBlock *SplitB; + MachineBasicBlock *TrueB, *FalseB, *JoinB; + unsigned PredR; + }; + struct PrintFP { + PrintFP(const FlowPattern &P, const TargetRegisterInfo &T) + : FP(P), TRI(T) {} + const FlowPattern &FP; + const TargetRegisterInfo &TRI; + friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); + }; + raw_ostream &operator<<(raw_ostream &OS, + const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { + OS << "{ SplitB:" << PrintMB(P.FP.SplitB) + << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI) + << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:" + << PrintMB(P.FP.FalseB) + << ", JoinB:" << PrintMB(P.FP.JoinB) << " }"; + return OS; + } + + class HexagonEarlyIfConversion : public MachineFunctionPass { + public: + static char ID; + HexagonEarlyIfConversion() : MachineFunctionPass(ID), + TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) { + initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon early if conversion"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + typedef DenseSet<MachineBasicBlock*> BlockSetType; + + bool isPreheader(const MachineBasicBlock *B) const; + bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, + FlowPattern &FP); + bool visitBlock(MachineBasicBlock *B, MachineLoop *L); + bool visitLoop(MachineLoop *L); + + bool hasEHLabel(const MachineBasicBlock *B) const; + bool hasUncondBranch(const MachineBasicBlock *B) const; + bool isValidCandidate(const MachineBasicBlock *B) const; + bool usesUndefVReg(const MachineInstr *MI) const; + bool isValid(const FlowPattern &FP) const; + unsigned countPredicateDefs(const MachineBasicBlock *B) const; + unsigned computePhiCost(MachineBasicBlock *B) const; + bool isProfitable(const FlowPattern &FP) const; + bool isPredicableStore(const MachineInstr *MI) const; + bool isSafeToSpeculate(const MachineInstr *MI) const; + + unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const; + void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, + MachineInstr *MI, unsigned PredR, bool IfTrue); + void predicateBlockNB(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineBasicBlock *FromB, + unsigned PredR, bool IfTrue); + + void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP); + void convert(const FlowPattern &FP); + + void removeBlock(MachineBasicBlock *B); + void eliminatePhis(MachineBasicBlock *B); + void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB); + void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB); + void simplifyFlowGraph(const FlowPattern &FP); + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineFunction *MFN; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + MachineLoopInfo *MLI; + BlockSetType Deleted; + const MachineBranchProbabilityInfo *MBPI; + }; + + char HexagonEarlyIfConversion::ID = 0; +} + +INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif", + "Hexagon early if conversion", false, false) + +bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { + if (B->succ_size() != 1) + return false; + MachineBasicBlock *SB = *B->succ_begin(); + MachineLoop *L = MLI->getLoopFor(SB); + return L && SB == L->getHeader(); +} + + +bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, + MachineLoop *L, FlowPattern &FP) { + DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); + + // Interested only in conditional branches, no .new, no new-value, etc. + // Check the terminators directly, it's easier than handling all responses + // from AnalyzeBranch. + MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); + if (T1I == B->end()) + return false; + unsigned Opc = T1I->getOpcode(); + if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) + return false; + unsigned PredR = T1I->getOperand(0).getReg(); + + // Get the layout successor, or 0 if B does not have one. + MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); + MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0; + + MachineBasicBlock *T1B = T1I->getOperand(1).getMBB(); + MachineBasicBlock::const_iterator T2I = std::next(T1I); + // The second terminator should be an unconditional branch. + assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump); + MachineBasicBlock *T2B = (T2I == B->end()) ? NextB + : T2I->getOperand(0).getMBB(); + if (T1B == T2B) { + // XXX merge if T1B == NextB, or convert branch to unconditional. + // mark as diamond with both sides equal? + return false; + } + // Loop could be null for both. + if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L) + return false; + + // Record the true/false blocks in such a way that "true" means "if (PredR)", + // and "false" means "if (!PredR)". + if (Opc == Hexagon::J2_jumpt) + TB = T1B, FB = T2B; + else + TB = T2B, FB = T1B; + + if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB)) + return false; + + // Detect triangle first. In case of a triangle, one of the blocks TB/FB + // can fall through into the other, in other words, it will be executed + // in both cases. We only want to predicate the block that is executed + // conditionally. + unsigned TNP = TB->pred_size(), FNP = FB->pred_size(); + unsigned TNS = TB->succ_size(), FNS = FB->succ_size(); + + // A block is predicable if it has one predecessor (it must be B), and + // it has a single successor. In fact, the block has to end either with + // an unconditional branch (which can be predicated), or with a fall- + // through. + bool TOk = (TNP == 1) && (TNS == 1); + bool FOk = (FNP == 1) && (FNS == 1); + + // If neither is predicable, there is nothing interesting. + if (!TOk && !FOk) + return false; + + MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0; + MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0; + MachineBasicBlock *JB = 0; + + if (TOk) { + if (FOk) { + if (TSB == FSB) + JB = TSB; + // Diamond: "if (P) then TB; else FB;". + } else { + // TOk && !FOk + if (TSB == FB) { + JB = FB; + FB = 0; + } + } + } else { + // !TOk && FOk (at least one must be true by now). + if (FSB == TB) { + JB = TB; + TB = 0; + } + } + // Don't try to predicate loop preheaders. + if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) { + DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) + << " is a loop preheader. Skipping.\n"); + return false; + } + + FP = FlowPattern(B, PredR, TB, FB, JB); + DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); + return true; +} + + +// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that +// contains EH_LABEL. +bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { + for (auto &I : *B) + if (I.isEHLabel()) + return true; + return false; +} + + +// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize +// that a block can never fall-through. +bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) + const { + MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); + while (I != E) { + if (I->isBarrier()) + return true; + ++I; + } + return false; +} + + +bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) + const { + if (!B) + return true; + if (B->isEHPad() || B->hasAddressTaken()) + return false; + if (B->succ_size() == 0) + return false; + + for (auto &MI : *B) { + if (MI.isDebugValue()) + continue; + if (MI.isConditionalBranch()) + return false; + unsigned Opc = MI.getOpcode(); + bool IsJMP = (Opc == Hexagon::J2_jump); + if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI)) + return false; + // Look for predicate registers defined by this instruction. It's ok + // to speculate such an instruction, but the predicate register cannot + // be used outside of this block (or else it won't be possible to + // update the use of it after predication). PHI uses will be updated + // to use a result of a MUX, and a MUX cannot be created for predicate + // registers. + for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass) + continue; + for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U) + if (U->getParent()->isPHI()) + return false; + } + } + return true; +} + + +bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isUse()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + const MachineInstr *DefI = MRI->getVRegDef(R); + // "Undefined" virtual registers are actually defined via IMPLICIT_DEF. + assert(DefI && "Expecting a reaching def in MRI"); + if (DefI->isImplicitDef()) + return true; + } + return false; +} + + +bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { + if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition + return false; + if (FP.TrueB && !isValidCandidate(FP.TrueB)) + return false; + if (FP.FalseB && !isValidCandidate(FP.FalseB)) + return false; + // Check the PHIs in the join block. If any of them use a register + // that is defined as IMPLICIT_DEF, do not convert this. This can + // legitimately happen if one side of the split never executes, but + // the compiler is unable to prove it. That side may then seem to + // provide an "undef" value to the join block, however it will never + // execute at run-time. If we convert this case, the "undef" will + // be used in a MUX instruction, and that may seem like actually + // using an undefined value to other optimizations. This could lead + // to trouble further down the optimization stream, cause assertions + // to fail, etc. + if (FP.JoinB) { + const MachineBasicBlock &B = *FP.JoinB; + for (auto &MI : B) { + if (!MI.isPHI()) + break; + if (usesUndefVReg(&MI)) + return false; + unsigned DefR = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefR); + if (RC == &Hexagon::PredRegsRegClass) + return false; + } + } + return true; +} + + +unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { + assert(B->pred_size() <= 2); + if (B->pred_size() < 2) + return 0; + + unsigned Cost = 0; + MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI(); + for (I = B->begin(); I != E; ++I) { + const MachineOperand &RO1 = I->getOperand(1); + const MachineOperand &RO3 = I->getOperand(3); + assert(RO1.isReg() && RO3.isReg()); + // Must have a MUX if the phi uses a subregister. + if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) { + Cost++; + continue; + } + MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg()); + MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg()); + if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3)) + Cost++; + } + return Cost; +} + + +unsigned HexagonEarlyIfConversion::countPredicateDefs( + const MachineBasicBlock *B) const { + unsigned PredDefs = 0; + for (auto &MI : *B) { + for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) + PredDefs++; + } + } + return PredDefs; +} + + +bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { + if (FP.TrueB && FP.FalseB) { + + // Do not IfCovert if the branch is one sided. + if (MBPI) { + BranchProbability Prob(9, 10); + if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob) + return false; + if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob) + return false; + } + + // If both sides are predicable, convert them if they join, and the + // join block has no other predecessors. + MachineBasicBlock *TSB = *FP.TrueB->succ_begin(); + MachineBasicBlock *FSB = *FP.FalseB->succ_begin(); + if (TSB != FSB) + return false; + if (TSB->pred_size() != 2) + return false; + } + + // Calculate the total size of the predicated blocks. + // Assume instruction counts without branches to be the approximation of + // the code size. If the predicated blocks are smaller than a packet size, + // approximate the spare room in the packet that could be filled with the + // predicated/speculated instructions. + unsigned TS = 0, FS = 0, Spare = 0; + if (FP.TrueB) { + TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator()); + if (TS < HEXAGON_PACKET_SIZE) + Spare += HEXAGON_PACKET_SIZE-TS; + } + if (FP.FalseB) { + FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator()); + if (FS < HEXAGON_PACKET_SIZE) + Spare += HEXAGON_PACKET_SIZE-TS; + } + unsigned TotalIn = TS+FS; + DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: " + << TotalIn << ", spare room: " << Spare << "\n"); + if (TotalIn >= SizeLimit+Spare) + return false; + + // Count the number of PHI nodes that will need to be updated (converted + // to MUX). Those can be later converted to predicated instructions, so + // they aren't always adding extra cost. + // KLUDGE: Also, count the number of predicate register definitions in + // each block. The scheduler may increase the pressure of these and cause + // expensive spills (e.g. bitmnp01). + unsigned TotalPh = 0; + unsigned PredDefs = countPredicateDefs(FP.SplitB); + if (FP.JoinB) { + TotalPh = computePhiCost(FP.JoinB); + PredDefs += countPredicateDefs(FP.JoinB); + } else { + if (FP.TrueB && FP.TrueB->succ_size() > 0) { + MachineBasicBlock *SB = *FP.TrueB->succ_begin(); + TotalPh += computePhiCost(SB); + PredDefs += countPredicateDefs(SB); + } + if (FP.FalseB && FP.FalseB->succ_size() > 0) { + MachineBasicBlock *SB = *FP.FalseB->succ_begin(); + TotalPh += computePhiCost(SB); + PredDefs += countPredicateDefs(SB); + } + } + DEBUG(dbgs() << "Total number of extra muxes from converted phis: " + << TotalPh << "\n"); + if (TotalIn+TotalPh >= SizeLimit+Spare) + return false; + + DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n"); + if (PredDefs > 4) + return false; + + return true; +} + + +bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, + MachineLoop *L) { + bool Changed = false; + + // Visit all dominated blocks from the same loop first, then process B. + MachineDomTreeNode *N = MDT->getNode(B); + typedef GraphTraits<MachineDomTreeNode*> GTN; + // We will change CFG/DT during this traversal, so take precautions to + // avoid problems related to invalidated iterators. In fact, processing + // a child C of B cannot cause another child to be removed, but it can + // cause a new child to be added (which was a child of C before C itself + // was removed. This new child C, however, would have been processed + // prior to processing B, so there is no need to process it again. + // Simply keep a list of children of B, and traverse that list. + typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; + DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); + for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + if (!Deleted.count(SB)) + Changed |= visitBlock(SB, L); + } + // When walking down the dominator tree, we want to traverse through + // blocks from nested (other) loops, because they can dominate blocks + // that are in L. Skip the non-L blocks only after the tree traversal. + if (MLI->getLoopFor(B) != L) + return Changed; + + FlowPattern FP; + if (!matchFlowPattern(B, L, FP)) + return Changed; + + if (!isValid(FP)) { + DEBUG(dbgs() << "Conversion is not valid\n"); + return Changed; + } + if (!isProfitable(FP)) { + DEBUG(dbgs() << "Conversion is not profitable\n"); + return Changed; + } + + convert(FP); + simplifyFlowGraph(FP); + return true; +} + + +bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { + MachineBasicBlock *HB = L ? L->getHeader() : 0; + DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) + : dbgs() << "Visiting function") << "\n"); + bool Changed = false; + if (L) { + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= visitLoop(*I); + } + + MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN); + Changed |= visitBlock(L ? HB : EntryB, L); + return Changed; +} + + +bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI) + const { + // Exclude post-increment stores. Those return a value, so we cannot + // predicate them. + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + switch (Opc) { + // Store byte: + case S2_storerb_io: case S4_storerb_rr: + case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp: + // Store halfword: + case S2_storerh_io: case S4_storerh_rr: + case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp: + // Store upper halfword: + case S2_storerf_io: case S4_storerf_rr: + case S2_storerfabs: case S2_storerfgp: + // Store word: + case S2_storeri_io: case S4_storeri_rr: + case S2_storeriabs: case S4_storeiri_io: case S2_storerigp: + // Store doubleword: + case S2_storerd_io: case S4_storerd_rr: + case S2_storerdabs: case S2_storerdgp: + return true; + } + return false; +} + + +bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) + const { + if (MI->mayLoad() || MI->mayStore()) + return false; + if (MI->isCall() || MI->isBarrier() || MI->isBranch()) + return false; + if (MI->hasUnmodeledSideEffects()) + return false; + + return true; +} + + +unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc, + bool IfTrue) const { + // Exclude post-increment stores. + using namespace Hexagon; + switch (Opc) { + case S2_storerb_io: + return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io; + case S4_storerb_rr: + return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr; + case S2_storerbabs: + case S2_storerbgp: + return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs; + case S4_storeirb_io: + return IfTrue ? S4_storeirbt_io : S4_storeirbf_io; + case S2_storerh_io: + return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io; + case S4_storerh_rr: + return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr; + case S2_storerhabs: + case S2_storerhgp: + return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs; + case S2_storerf_io: + return IfTrue ? S2_pstorerft_io : S2_pstorerff_io; + case S4_storerf_rr: + return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr; + case S2_storerfabs: + case S2_storerfgp: + return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs; + case S4_storeirh_io: + return IfTrue ? S4_storeirht_io : S4_storeirhf_io; + case S2_storeri_io: + return IfTrue ? S2_pstorerit_io : S2_pstorerif_io; + case S4_storeri_rr: + return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr; + case S2_storeriabs: + case S2_storerigp: + return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs; + case S4_storeiri_io: + return IfTrue ? S4_storeirit_io : S4_storeirif_io; + case S2_storerd_io: + return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io; + case S4_storerd_rr: + return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr; + case S2_storerdabs: + case S2_storerdgp: + return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs; + } + llvm_unreachable("Unexpected opcode"); + return 0; +} + + +void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineInstr *MI, + unsigned PredR, bool IfTrue) { + DebugLoc DL; + if (At != ToB->end()) + DL = At->getDebugLoc(); + else if (!ToB->empty()) + DL = ToB->back().getDebugLoc(); + + unsigned Opc = MI->getOpcode(); + + if (isPredicableStore(MI)) { + unsigned COpc = getCondStoreOpcode(Opc, IfTrue); + assert(COpc); + MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc)) + .addReg(PredR); + for (MIOperands MO(MI); MO.isValid(); ++MO) + MIB.addOperand(*MO); + + // Set memory references. + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + MIB.setMemRefs(MMOBegin, MMOEnd); + + MI->eraseFromParent(); + return; + } + + if (Opc == Hexagon::J2_jump) { + MachineBasicBlock *TB = MI->getOperand(0).getMBB(); + const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt + : Hexagon::J2_jumpf); + BuildMI(*ToB, At, DL, D) + .addReg(PredR) + .addMBB(TB); + MI->eraseFromParent(); + return; + } + + // Print the offending instruction unconditionally as we are about to + // abort. + dbgs() << *MI; + llvm_unreachable("Unexpected instruction"); +} + + +// Predicate/speculate non-branch instructions from FromB into block ToB. +// Leave the branches alone, they will be handled later. Btw, at this point +// FromB should have at most one branch, and it should be unconditional. +void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineBasicBlock *FromB, + unsigned PredR, bool IfTrue) { + DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n"); + MachineBasicBlock::iterator End = FromB->getFirstTerminator(); + MachineBasicBlock::iterator I, NextI; + + for (I = FromB->begin(); I != End; I = NextI) { + assert(!I->isPHI()); + NextI = std::next(I); + if (isSafeToSpeculate(&*I)) + ToB->splice(At, FromB, I); + else + predicateInstr(ToB, At, &*I, PredR, IfTrue); + } +} + + +void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, + const FlowPattern &FP) { + // Visit all PHI nodes in the WhereB block and generate MUX instructions + // in the split block. Update the PHI nodes with the values of the MUX. + auto NonPHI = WhereB->getFirstNonPHI(); + for (auto I = WhereB->begin(); I != NonPHI; ++I) { + MachineInstr *PN = &*I; + // Registers and subregisters corresponding to TrueB, FalseB and SplitB. + unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0; + for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { + const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1); + if (BO.getMBB() == FP.SplitB) + SR = RO.getReg(), SSR = RO.getSubReg(); + else if (BO.getMBB() == FP.TrueB) + TR = RO.getReg(), TSR = RO.getSubReg(); + else if (BO.getMBB() == FP.FalseB) + FR = RO.getReg(), FSR = RO.getSubReg(); + else + continue; + PN->RemoveOperand(i+1); + PN->RemoveOperand(i); + } + if (TR == 0) + TR = SR, TSR = SSR; + else if (FR == 0) + FR = SR, FSR = SSR; + assert(TR && FR); + + using namespace Hexagon; + unsigned DR = PN->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DR); + const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux) + : TII->get(MUX64_rr); + + MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator(); + DebugLoc DL; + if (MuxAt != FP.SplitB->end()) + DL = MuxAt->getDebugLoc(); + unsigned MuxR = MRI->createVirtualRegister(RC); + BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR) + .addReg(FP.PredR) + .addReg(TR, 0, TSR) + .addReg(FR, 0, FSR); + + PN->addOperand(MachineOperand::CreateReg(MuxR, false)); + PN->addOperand(MachineOperand::CreateMBB(FP.SplitB)); + } +} + + +void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { + MachineBasicBlock *TSB = 0, *FSB = 0; + MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator(); + assert(OldTI != FP.SplitB->end()); + DebugLoc DL = OldTI->getDebugLoc(); + + if (FP.TrueB) { + TSB = *FP.TrueB->succ_begin(); + predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true); + } + if (FP.FalseB) { + FSB = *FP.FalseB->succ_begin(); + MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator(); + predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false); + } + + // Regenerate new terminators in the split block and update the successors. + // First, remember any information that may be needed later and remove the + // existing terminators/successors from the split block. + MachineBasicBlock *SSB = 0; + FP.SplitB->erase(OldTI, FP.SplitB->end()); + while (FP.SplitB->succ_size() > 0) { + MachineBasicBlock *T = *FP.SplitB->succ_begin(); + // It's possible that the split block had a successor that is not a pre- + // dicated block. This could only happen if there was only one block to + // be predicated. Example: + // split_b: + // if (p) jump true_b + // jump unrelated2_b + // unrelated1_b: + // ... + // unrelated2_b: ; can have other predecessors, so it's not "false_b" + // jump other_b + // true_b: ; only reachable from split_b, can be predicated + // ... + // + // Find this successor (SSB) if it exists. + if (T != FP.TrueB && T != FP.FalseB) { + assert(!SSB); + SSB = T; + } + FP.SplitB->removeSuccessor(FP.SplitB->succ_begin()); + } + + // Insert new branches and update the successors of the split block. This + // may create unconditional branches to the layout successor, etc., but + // that will be cleaned up later. For now, make sure that correct code is + // generated. + if (FP.JoinB) { + assert(!SSB || SSB == FP.JoinB); + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + .addMBB(FP.JoinB); + FP.SplitB->addSuccessor(FP.JoinB); + } else { + bool HasBranch = false; + if (TSB) { + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt)) + .addReg(FP.PredR) + .addMBB(TSB); + FP.SplitB->addSuccessor(TSB); + HasBranch = true; + } + if (FSB) { + const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump) + : TII->get(Hexagon::J2_jumpf); + MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D); + if (!HasBranch) + MIB.addReg(FP.PredR); + MIB.addMBB(FSB); + FP.SplitB->addSuccessor(FSB); + } + if (SSB) { + // This cannot happen if both TSB and FSB are set. [TF]SB are the + // successor blocks of the TrueB and FalseB (or null of the TrueB + // or FalseB block is null). SSB is the potential successor block + // of the SplitB that is neither TrueB nor FalseB. + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + .addMBB(SSB); + FP.SplitB->addSuccessor(SSB); + } + } + + // What is left to do is to update the PHI nodes that could have entries + // referring to predicated blocks. + if (FP.JoinB) { + updatePhiNodes(FP.JoinB, FP); + } else { + if (TSB) + updatePhiNodes(TSB, FP); + if (FSB) + updatePhiNodes(FSB, FP); + // Nothing to update in SSB, since SSB's predecessors haven't changed. + } +} + + +void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { + DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); + + // Transfer the immediate dominator information from B to its descendants. + MachineDomTreeNode *N = MDT->getNode(B); + MachineDomTreeNode *IDN = N->getIDom(); + if (IDN) { + MachineBasicBlock *IDB = IDN->getBlock(); + typedef GraphTraits<MachineDomTreeNode*> GTN; + typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; + DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); + for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + MDT->changeImmediateDominator(SB, IDB); + } + } + + while (B->succ_size() > 0) + B->removeSuccessor(B->succ_begin()); + + for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I) + (*I)->removeSuccessor(B, true); + + Deleted.insert(B); + MDT->eraseNode(B); + MFN->erase(B->getIterator()); +} + + +void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { + DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); + MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI(); + for (I = B->begin(); I != NonPHI; I = NextI) { + NextI = std::next(I); + MachineInstr *PN = &*I; + assert(PN->getNumOperands() == 3 && "Invalid phi node"); + MachineOperand &UO = PN->getOperand(1); + unsigned UseR = UO.getReg(), UseSR = UO.getSubReg(); + unsigned DefR = PN->getOperand(0).getReg(); + unsigned NewR = UseR; + if (UseSR) { + // MRI.replaceVregUsesWith does not allow to update the subregister, + // so instead of doing the use-iteration here, create a copy into a + // "non-subregistered" register. + DebugLoc DL = PN->getDebugLoc(); + const TargetRegisterClass *RC = MRI->getRegClass(DefR); + NewR = MRI->createVirtualRegister(RC); + NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR) + .addReg(UseR, 0, UseSR); + } + MRI->replaceRegWith(DefR, NewR); + B->erase(I); + } +} + + +void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, + MachineBasicBlock *NewB) { + for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { + MachineBasicBlock *SB = *I; + MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); + for (P = SB->begin(); P != N; ++P) { + MachineInstr *PN = &*P; + for (MIOperands MO(PN); MO.isValid(); ++MO) + if (MO->isMBB() && MO->getMBB() == OldB) + MO->setMBB(NewB); + } + } +} + + +void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, + MachineBasicBlock *SuccB) { + DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " + << PrintMB(SuccB) << "\n"); + bool TermOk = hasUncondBranch(SuccB); + eliminatePhis(SuccB); + TII->RemoveBranch(*PredB); + PredB->removeSuccessor(SuccB); + PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); + MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); + for (I = SuccB->succ_begin(); I != E; ++I) + PredB->addSuccessor(*I); + PredB->normalizeSuccProbs(); + replacePhiEdges(SuccB, PredB); + removeBlock(SuccB); + if (!TermOk) + PredB->updateTerminator(); +} + + +void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { + if (FP.TrueB) + removeBlock(FP.TrueB); + if (FP.FalseB) + removeBlock(FP.FalseB); + + FP.SplitB->updateTerminator(); + if (FP.SplitB->succ_size() != 1) + return; + + MachineBasicBlock *SB = *FP.SplitB->succ_begin(); + if (SB->pred_size() != 1) + return; + + // By now, the split block has only one successor (SB), and SB has only + // one predecessor. We can try to merge them. We will need to update ter- + // minators in FP.Split+SB, and that requires working AnalyzeBranch, which + // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends + // with an unconditional branch, we won't need to touch the terminators. + if (!hasEHLabel(SB) || hasUncondBranch(SB)) + mergeBlocks(FP.SplitB, SB); +} + + +bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { + auto &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MFN = &MF; + MRI = &MF.getRegInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + MLI = &getAnalysis<MachineLoopInfo>(); + MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() : + nullptr; + + Deleted.clear(); + bool Changed = false; + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) + Changed |= visitLoop(*I); + Changed |= visitLoop(0); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// +FunctionPass *llvm::createHexagonEarlyIfConversion() { + return new HexagonEarlyIfConversion(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp new file mode 100644 index 0000000..ce10aea --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -0,0 +1,1357 @@ +//===--- HexagonExpandCondsets.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Replace mux instructions with the corresponding legal instructions. +// It is meant to work post-SSA, but still on virtual registers. It was +// originally placed between register coalescing and machine instruction +// scheduler. +// In this place in the optimization sequence, live interval analysis had +// been performed, and the live intervals should be preserved. A large part +// of the code deals with preserving the liveness information. +// +// Liveness tracking aside, the main functionality of this pass is divided +// into two steps. The first step is to replace an instruction +// vreg0 = C2_mux vreg0, vreg1, vreg2 +// with a pair of conditional transfers +// vreg0 = A2_tfrt vreg0, vreg1 +// vreg0 = A2_tfrf vreg0, vreg2 +// It is the intention that the execution of this pass could be terminated +// after this step, and the code generated would be functionally correct. +// +// If the uses of the source values vreg1 and vreg2 are kills, and their +// definitions are predicable, then in the second step, the conditional +// transfers will then be rewritten as predicated instructions. E.g. +// vreg0 = A2_or vreg1, vreg2 +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// will be rewritten as +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// This replacement has two variants: "up" and "down". Consider this case: +// vreg0 = A2_or vreg1, vreg2 +// ... [intervening instructions] ... +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// variant "up": +// vreg3 = A2_port vreg99, vreg1, vreg2 +// ... [intervening instructions, vreg0->vreg3] ... +// [deleted] +// variant "down": +// [deleted] +// ... [intervening instructions] ... +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// Both, one or none of these variants may be valid, and checks are made +// to rule out inapplicable variants. +// +// As an additional optimization, before either of the two steps above is +// executed, the pass attempts to coalesce the target register with one of +// the source registers, e.g. given an instruction +// vreg3 = C2_mux vreg0, vreg1, vreg2 +// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds, +// the instruction would then be (for example) +// vreg3 = C2_mux vreg0, vreg3, vreg2 +// and, under certain circumstances, this could result in only one predicated +// instruction: +// vreg3 = A2_tfrf vreg0, vreg2 +// + +#define DEBUG_TYPE "expand-condsets" +#include "HexagonTargetMachine.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions")); +static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); + +namespace llvm { + void initializeHexagonExpandCondsetsPass(PassRegistry&); + FunctionPass *createHexagonExpandCondsets(); +} + +namespace { + class HexagonExpandCondsets : public MachineFunctionPass { + public: + static char ID; + HexagonExpandCondsets() : + MachineFunctionPass(ID), HII(0), TRI(0), MRI(0), + LIS(0), CoaLimitActive(false), + TfrLimitActive(false), CoaCounter(0), TfrCounter(0) { + if (OptCoaLimit.getPosition()) + CoaLimitActive = true, CoaLimit = OptCoaLimit; + if (OptTfrLimit.getPosition()) + TfrLimitActive = true, TfrLimit = OptTfrLimit; + initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); + } + + virtual const char *getPassName() const { + return "Hexagon Expand Condsets"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + const HexagonInstrInfo *HII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + + bool CoaLimitActive, TfrLimitActive; + unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter; + + struct RegisterRef { + RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), + Sub(Op.getSubReg()) {} + RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} + bool operator== (RegisterRef RR) const { + return Reg == RR.Reg && Sub == RR.Sub; + } + bool operator!= (RegisterRef RR) const { return !operator==(RR); } + unsigned Reg, Sub; + }; + + typedef DenseMap<unsigned,unsigned> ReferenceMap; + enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; + enum { Exec_Then = 0x10, Exec_Else = 0x20 }; + unsigned getMaskForSub(unsigned Sub); + bool isCondset(const MachineInstr *MI); + + void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); + bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); + + LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S); + LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S); + void makeDefined(unsigned Reg, SlotIndex S, bool SetDef); + void makeUndead(unsigned Reg, SlotIndex S); + void shrinkToUses(unsigned Reg, LiveInterval &LI); + void updateKillFlags(unsigned Reg, LiveInterval &LI); + void terminateSegment(LiveInterval::iterator LT, SlotIndex S, + LiveInterval &LI); + void addInstrToLiveness(MachineInstr *MI); + void removeInstrFromLiveness(MachineInstr *MI); + + unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); + MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR, + unsigned DstSR, const MachineOperand &PredOp, bool Cond); + bool split(MachineInstr *MI); + bool splitInBlock(MachineBasicBlock &B); + + bool isPredicable(MachineInstr *MI); + MachineInstr *getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); + bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses); + bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown); + void predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond); + void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, + bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last); + bool predicate(MachineInstr *TfrI, bool Cond); + bool predicateInBlock(MachineBasicBlock &B); + + void postprocessUndefImplicitUses(MachineBasicBlock &B); + void removeImplicitUses(MachineInstr *MI); + void removeImplicitUses(MachineBasicBlock &B); + + bool isIntReg(RegisterRef RR, unsigned &BW); + bool isIntraBlocks(LiveInterval &LI); + bool coalesceRegisters(RegisterRef R1, RegisterRef R2); + bool coalesceSegments(MachineFunction &MF); + }; +} + +char HexagonExpandCondsets::ID = 0; + + +unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { + switch (Sub) { + case Hexagon::subreg_loreg: + return Sub_Low; + case Hexagon::subreg_hireg: + return Sub_High; + case Hexagon::NoSubRegister: + return Sub_None; + } + llvm_unreachable("Invalid subregister"); +} + + +bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::C2_mux: + case Hexagon::C2_muxii: + case Hexagon::C2_muxir: + case Hexagon::C2_muxri: + case Hexagon::MUX64_rr: + return true; + break; + } + return false; +} + + +void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + Map.insert(std::make_pair(RR.Reg, Mask)); + else + F->second |= Mask; +} + + +bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + return false; + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + if (Mask & F->second) + return true; + return false; +} + + +LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI, + SlotIndex S) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->start >= S) + return I; + } + return LI.end(); +} + + +LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI, + SlotIndex S) { + LiveInterval::iterator P = LI.end(); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->end > S) + return P; + P = I; + } + return P; +} + + +/// Find the implicit use of register Reg in slot index S, and make sure +/// that the "defined" flag is set to SetDef. While the mux expansion is +/// going on, predicated instructions will have implicit uses of the +/// registers that are being defined. This is to keep any preceding +/// definitions live. If there is no preceding definition, the implicit +/// use will be marked as "undef", otherwise it will be "defined". This +/// function is used to update the flag. +void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S, + bool SetDef) { + if (!S.isRegister()) + return; + MachineInstr *MI = LIS->getInstructionFromIndex(S); + assert(MI && "Expecting instruction"); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + bool IsDef = !Op.isUndef(); + if (Op.isImplicit() && IsDef != SetDef) + Op.setIsUndef(!SetDef); + } +} + + +void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) { + // If S is a block boundary, then there can still be a dead def reaching + // this point. Instead of traversing the CFG, queue start points of all + // live segments that begin with a register, and end at a block boundary. + // This may "resurrect" some truly dead definitions, but doing so is + // harmless. + SmallVector<MachineInstr*,8> Defs; + if (S.isBlock()) { + LiveInterval &LI = LIS->getInterval(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isRegister() || !I->end.isBlock()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(I->start); + Defs.push_back(MI); + } + } else if (S.isRegister()) { + MachineInstr *MI = LIS->getInstructionFromIndex(S); + Defs.push_back(MI); + } + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + MachineInstr *MI = Defs[i]; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + } +} + + +/// Shrink the segments in the live interval for a given register to the last +/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this +/// function will not mark any definitions of Reg as dead. The reason for this +/// is that this function is used while a MUX instruction is being expanded, +/// or while a conditional copy is undergoing predication. During these +/// processes, there may be defs present in the instruction sequence that have +/// not yet been removed, or there may be missing uses that have not yet been +/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible, +/// but since it does not extend any intervals that are too short, we need to +/// pre-emptively extend them here in anticipation of further changes. +void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) { + SmallVector<MachineInstr*,4> Deads; + LIS->shrinkToUses(&LI, &Deads); + // Need to undo the deadification made by "shrinkToUses". It's easier to + // do it here, since we have a list of all instructions that were just + // marked as dead. + for (unsigned i = 0, n = Deads.size(); i < n; ++i) { + MachineInstr *MI = Deads[i]; + // Clear the "dead" flag. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + // Extend the live segment to the beginning of the next one. + LiveInterval::iterator End = LI.end(); + SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot(); + LiveInterval::iterator T = LI.FindSegmentContaining(S); + assert(T != End); + LiveInterval::iterator N = std::next(T); + if (N != End) + T->end = N->start; + else + T->end = LIS->getMBBEndIdx(MI->getParent()); + } + updateKillFlags(Reg, LI); +} + + +/// Given an updated live interval LI for register Reg, update the kill flags +/// in instructions using Reg to reflect the liveness changes. +void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) { + MRI->clearKillFlags(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + SlotIndex EX = I->end; + if (!EX.isRegister()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(EX); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + // Only set the kill flag on the first encountered use of Reg in this + // instruction. + Op.setIsKill(true); + break; + } + } +} + + +/// When adding a new instruction to liveness, the newly added definition +/// will start a new live segment. This may happen at a position that falls +/// within an existing live segment. In such case that live segment needs to +/// be truncated to make room for the new segment. Ultimately, the truncation +/// will occur at the last use, but for now the segment can be terminated +/// right at the place where the new segment will start. The segments will be +/// shrunk-to-uses later. +void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT, + SlotIndex S, LiveInterval &LI) { + // Terminate the live segment pointed to by LT within a live interval LI. + if (LT == LI.end()) + return; + + VNInfo *OldVN = LT->valno; + SlotIndex EX = LT->end; + LT->end = S; + // If LT does not end at a block boundary, the termination is done. + if (!EX.isBlock()) + return; + + // If LT ended at a block boundary, it's possible that its value number + // is picked up at the beginning other blocks. Create a new value number + // and change such blocks to use it instead. + VNInfo *NewVN = 0; + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isBlock() || I->valno != OldVN) + continue; + // Generate on-demand a new value number that is defined by the + // block beginning (i.e. -phi). + if (!NewVN) + NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator()); + I->valno = NewVN; + } +} + + +/// Add the specified instruction to live intervals. This function is used +/// to update the live intervals while the program code is being changed. +/// Neither the expansion of a MUX, nor the predication are atomic, and this +/// function is used to update the live intervals while these transformations +/// are being done. +void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI) + : LIS->getInstructionIndex(MI); + DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI); + + MX = MX.getRegSlot(); + bool Predicated = HII->isPredicated(MI); + MachineBasicBlock *MB = MI->getParent(); + + // Strip all implicit uses from predicated instructions. They will be + // added again, according to the updated information. + if (Predicated) + removeImplicitUses(MI); + + // For each def in MI we need to insert a new live segment starting at MX + // into the interval. If there already exists a live segment in the interval + // that contains MX, we need to terminate it at MX. + SmallVector<RegisterRef,2> Defs; + for (auto &Op : MI->operands()) + if (Op.isReg() && Op.isDef()) + Defs.push_back(RegisterRef(Op)); + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + unsigned DefR = Defs[i].Reg; + LiveInterval &LID = LIS->getInterval(DefR); + DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + // If MX falls inside of an existing live segment, terminate it. + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + if (LT != LID.end()) + terminateSegment(LT, MX, LID); + DEBUG(dbgs() << "after terminating segment\n " << LID << "\n"); + + // Create a new segment starting from MX. + LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX); + SlotIndex EX; + VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator()); + if (N == LID.end()) { + // There is no live segment after MX. End this segment at the end of + // the block. + EX = LIS->getMBBEndIdx(MB); + } else { + // If the next segment starts at the block boundary, end the new segment + // at the boundary of the preceding block (i.e. the previous index). + // Otherwise, end the segment at the beginning of the next segment. In + // either case it will be "shrunk-to-uses" later. + EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start; + } + if (Predicated) { + // Predicated instruction will have an implicit use of the defined + // register. This is necessary so that this definition will not make + // any previous definitions dead. If there are no previous live + // segments, still add the implicit use, but make it "undef". + // Because of the implicit use, the preceding definition is not + // dead. Mark is as such (if necessary). + MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true); + ImpUse.setSubReg(Defs[i].Sub); + bool Undef = false; + if (P == LID.end()) + Undef = true; + else { + // If the previous segment extends to the end of the previous block, + // the end index may actually be the beginning of this block. If + // the previous segment ends at a block boundary, move it back by one, + // to get the proper block for it. + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) + Undef = true; + } + if (!Undef) { + makeUndead(DefR, P->valno->def); + // We are adding a live use, so extend the previous segment to + // include it. + P->end = MX; + } else { + ImpUse.setIsUndef(true); + } + + if (!MI->readsRegister(DefR)) + MI->addOperand(ImpUse); + if (N != LID.end()) + makeDefined(DefR, N->start, true); + } + LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN); + LID.addSegment(NR); + DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n"); + shrinkToUses(DefR, LID); + DEBUG(dbgs() << "updated imp-uses: " << *MI); + LID.verify(); + } + + // For each use in MI: + // - If there is no live segment that contains MX for the used register, + // extend the previous one. Ignore implicit uses. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef()) + continue; + unsigned UseR = Op.getReg(); + LiveInterval &LIU = LIS->getInterval(UseR); + // Find the last segment P that starts before MX. + LiveInterval::iterator P = LIU.FindSegmentContaining(MX); + if (P == LIU.end()) + P = prevSegment(LIU, MX); + + assert(P != LIU.end() && "MI uses undefined register?"); + SlotIndex EX = P->end; + // If P contains MX, there is not much to do. + if (EX > MX) { + Op.setIsKill(false); + continue; + } + // Otherwise, extend P to "next(MX)". + P->end = MX.getNextIndex(); + Op.setIsKill(true); + // Get the old "kill" instruction, and remove the kill flag. + if (MachineInstr *KI = LIS->getInstructionFromIndex(MX)) + KI->clearRegisterKills(UseR, nullptr); + shrinkToUses(UseR, LIU); + LIU.verify(); + } +} + + +/// Update the live interval information to reflect the removal of the given +/// instruction from the program. As with "addInstrToLiveness", this function +/// is called while the program code is being changed. +void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot(); + DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI); + + // For each def in MI: + // If MI starts a live segment, merge this segment with the previous segment. + // + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned DefR = Op.getReg(); + LiveInterval &LID = LIS->getInterval(DefR); + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + assert(LT != LID.end() && "Expecting live segments"); + DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + if (LT->start != MX) + continue; + + VNInfo *MVN = LT->valno; + if (LT != LID.begin()) { + // If the current live segment is not the first, the task is easy. If + // the previous segment continues into the current block, extend it to + // the end of the current one, and merge the value numbers. + // Otherwise, remove the current segment, and make the end of it "undef". + LiveInterval::iterator P = std::prev(LT); + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *MB = MI->getParent(); + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) { + makeDefined(DefR, LT->end, false); + LID.removeSegment(*LT); + } else { + // Make the segments adjacent, so that merge-vn can also merge the + // segments. + P->end = LT->start; + makeUndead(DefR, P->valno->def); + LID.MergeValueNumberInto(MVN, P->valno); + } + } else { + LiveInterval::iterator N = std::next(LT); + LiveInterval::iterator RmB = LT, RmE = N; + while (N != LID.end()) { + // Iterate until the first register-based definition is found + // (i.e. skip all block-boundary entries). + LiveInterval::iterator Next = std::next(N); + if (N->start.isRegister()) { + makeDefined(DefR, N->start, false); + break; + } + if (N->end.isRegister()) { + makeDefined(DefR, N->end, false); + RmE = Next; + break; + } + RmE = Next; + N = Next; + } + // Erase the segments in one shot to avoid invalidating iterators. + LID.segments.erase(RmB, RmE); + } + + bool VNUsed = false; + for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) { + if (I->valno != MVN) + continue; + VNUsed = true; + break; + } + if (!VNUsed) + MVN->markUnused(); + + DEBUG(dbgs() << "new interval: "); + if (!LID.empty()) { + DEBUG(dbgs() << LID << "\n"); + LID.verify(); + } else { + DEBUG(dbgs() << "<empty>\n"); + LIS->removeInterval(DefR); + } + } + + // For uses there is nothing to do. The intervals will be updated via + // shrinkToUses. + SmallVector<unsigned,4> Uses; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.push_back(R); + } + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + for (unsigned i = 0, n = Uses.size(); i < n; ++i) { + LiveInterval &LI = LIS->getInterval(Uses[i]); + shrinkToUses(Uses[i], LI); + } +} + + +/// Get the opcode for a conditional transfer of the value in SO (source +/// operand). The condition (true/false) is given in Cond. +unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, + bool Cond) { + using namespace Hexagon; + if (SO.isReg()) { + unsigned PhysR; + RegisterRef RS = SO; + if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) { + const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); + assert(VC->begin() != VC->end() && "Empty register class"); + PhysR = *VC->begin(); + } else { + assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); + PhysR = RS.Reg; + } + unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); + switch (RC->getSize()) { + case 4: + return Cond ? A2_tfrt : A2_tfrf; + case 8: + return Cond ? A2_tfrpt : A2_tfrpf; + } + llvm_unreachable("Invalid register operand"); + } + if (SO.isImm() || SO.isFPImm()) + return Cond ? C2_cmoveit : C2_cmoveif; + llvm_unreachable("Unexpected source operand"); +} + + +/// Generate a conditional transfer, copying the value SrcOp to the +/// destination register DstR:DstSR, and using the predicate register from +/// PredOp. The Cond argument specifies whether the predicate is to be +/// if(PredOp), or if(!PredOp). +MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp, + unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) { + MachineInstr *MI = SrcOp.getParent(); + MachineBasicBlock &B = *MI->getParent(); + MachineBasicBlock::iterator At = MI; + DebugLoc DL = MI->getDebugLoc(); + + // Don't avoid identity copies here (i.e. if the source and the destination + // are the same registers). It is actually better to generate them here, + // since this would cause the copy to potentially be predicated in the next + // step. The predication will remove such a copy if it is unable to + /// predicate. + + unsigned Opc = getCondTfrOpcode(SrcOp, Cond); + MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc)) + .addReg(DstR, RegState::Define, DstSR) + .addOperand(PredOp) + .addOperand(SrcOp); + // We don't want any kills yet. + TfrI->clearKillInfo(); + DEBUG(dbgs() << "created an initial copy: " << *TfrI); + return TfrI; +} + + +/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function +/// performs all necessary changes to complete the replacement. +bool HexagonExpandCondsets::split(MachineInstr *MI) { + if (TfrLimitActive) { + if (TfrCounter >= TfrLimit) + return false; + TfrCounter++; + } + DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber() + << ": " << *MI); + MachineOperand &MD = MI->getOperand(0); // Definition + MachineOperand &MP = MI->getOperand(1); // Predicate register + assert(MD.isDef()); + unsigned DR = MD.getReg(), DSR = MD.getSubReg(); + + // First, create the two invididual conditional transfers, and add each + // of them to the live intervals information. Do that first and then remove + // the old instruction from live intervals. + if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true)) + addInstrToLiveness(TfrT); + if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false)) + addInstrToLiveness(TfrF); + removeInstrFromLiveness(MI); + + return true; +} + + +/// Split all MUX instructions in the given block into pairs of contitional +/// transfers. +bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + if (isCondset(I)) + Changed |= split(I); + } + return Changed; +} + + +bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { + if (HII->isPredicated(MI) || !HII->isPredicable(MI)) + return false; + if (MI->hasUnmodeledSideEffects() || MI->mayStore()) + return false; + // Reject instructions with multiple defs (e.g. post-increment loads). + bool HasDef = false; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + if (HasDef) + return false; + HasDef = true; + } + for (auto &Mo : MI->memoperands()) + if (Mo->isVolatile()) + return false; + return true; +} + + +/// Find the reaching definition for a predicated use of RD. The RD is used +/// under the conditions given by PredR and Cond, and this function will ignore +/// definitions that set RD under the opposite conditions. +MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { + MachineBasicBlock &B = *UseIt->getParent(); + MachineBasicBlock::iterator I = UseIt, S = B.begin(); + if (I == S) + return 0; + + bool PredValid = true; + do { + --I; + MachineInstr *MI = &*I; + // Check if this instruction can be ignored, i.e. if it is predicated + // on the complementary condition. + if (PredValid && HII->isPredicated(MI)) { + if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI))) + continue; + } + + // Check the defs. If the PredR is defined, invalidate it. If RD is + // defined, return the instruction or 0, depending on the circumstances. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + RegisterRef RR = Op; + if (RR.Reg == PredR) { + PredValid = false; + continue; + } + if (RR.Reg != RD.Reg) + continue; + // If the "Reg" part agrees, there is still the subregister to check. + // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but + // not vreg1 (w/o subregisters). + if (RR.Sub == RD.Sub) + return MI; + if (RR.Sub == 0 || RD.Sub == 0) + return 0; + // We have different subregisters, so we can continue looking. + } + } while (I != S); + + return 0; +} + + +/// Check if the instruction MI can be safely moved over a set of instructions +/// whose side-effects (in terms of register defs and uses) are expressed in +/// the maps Defs and Uses. These maps reflect the conditional defs and uses +/// that depend on the same predicate register to allow moving instructions +/// over instructions predicated on the opposite condition. +bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs, + ReferenceMap &Uses) { + // In order to be able to safely move MI over instructions that define + // "Defs" and use "Uses", no def operand from MI can be defined or used + // and no use operand can be defined. + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + RegisterRef RR = Op; + // For physical register we would need to check register aliases, etc. + // and we don't want to bother with that. It would be of little value + // before the actual register rewriting (from virtual to physical). + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + // No redefs for any operand. + if (isRefInMap(RR, Defs, Exec_Then)) + return false; + // For defs, there cannot be uses. + if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then)) + return false; + } + return true; +} + + +/// Check if the instruction accessing memory (TheI) can be moved to the +/// location ToI. +bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI, + bool IsDown) { + bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore(); + if (!IsLoad && !IsStore) + return true; + if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI)) + return true; + if (TheI->hasUnmodeledSideEffects()) + return false; + + MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; + MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; + bool Ordered = TheI->hasOrderedMemoryRef(); + + // Search for aliased memory reference in (StartI, EndI). + for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) { + MachineInstr *MI = &*I; + if (MI->hasUnmodeledSideEffects()) + return false; + bool L = MI->mayLoad(), S = MI->mayStore(); + if (!L && !S) + continue; + if (Ordered && MI->hasOrderedMemoryRef()) + return false; + + bool Conflict = (L && IsStore) || S; + if (Conflict) + return false; + } + return true; +} + + +/// Generate a predicated version of MI (where the condition is given via +/// PredR and Cond) at the point indicated by Where. +void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) { + // The problem with updating live intervals is that we can move one def + // past another def. In particular, this can happen when moving an A2_tfrt + // over an A2_tfrf defining the same register. From the point of view of + // live intervals, these two instructions are two separate definitions, + // and each one starts another live segment. LiveIntervals's "handleMove" + // does not allow such moves, so we need to handle it ourselves. To avoid + // invalidating liveness data while we are using it, the move will be + // implemented in 4 steps: (1) add a clone of the instruction MI at the + // target location, (2) update liveness, (3) delete the old instruction, + // and (4) update liveness again. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. + unsigned Opc = MI->getOpcode(); + unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); + MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); + unsigned Ox = 0, NP = MI->getNumOperands(); + // Skip all defs from MI first. + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isDef()) + break; + Ox++; + } + // Add the new def, then the predicate register, then the rest of the + // operands. + MB.addReg(RD.Reg, RegState::Define, RD.Sub); + MB.addReg(PredR); + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isImplicit()) + MB.addOperand(MO); + Ox++; + } + + MachineFunction &MF = *B.getParent(); + MachineInstr::mmo_iterator I = MI->memoperands_begin(); + unsigned NR = std::distance(I, MI->memoperands_end()); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR); + for (unsigned i = 0; i < NR; ++i) + MemRefs[i] = *I++; + MB.setMemRefs(MemRefs, MemRefs+NR); + + MachineInstr *NewI = MB; + NewI->clearKillInfo(); + addInstrToLiveness(NewI); +} + + +/// In the range [First, Last], rename all references to the "old" register RO +/// to the "new" register RN, but only in instructions predicated on the given +/// condition. +void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, + unsigned PredR, bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last) { + MachineBasicBlock::iterator End = std::next(Last); + for (MachineBasicBlock::iterator I = First; I != End; ++I) { + MachineInstr *MI = &*I; + // Do not touch instructions that are not predicated, or are predicated + // on the opposite condition. + if (!HII->isPredicated(MI)) + continue; + if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI))) + continue; + + for (auto &Op : MI->operands()) { + if (!Op.isReg() || RO != RegisterRef(Op)) + continue; + Op.setReg(RN.Reg); + Op.setSubReg(RN.Sub); + // In practice, this isn't supposed to see any defs. + assert(!Op.isDef() && "Not expecting a def"); + } + } +} + + +/// For a given conditional copy, predicate the definition of the source of +/// the copy under the given condition (using the same predicate register as +/// the copy). +bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) { + // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). + unsigned Opc = TfrI->getOpcode(); + (void)Opc; + assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); + DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") + << ": " << *TfrI); + + MachineOperand &MD = TfrI->getOperand(0); + MachineOperand &MP = TfrI->getOperand(1); + MachineOperand &MS = TfrI->getOperand(2); + // The source operand should be a <kill>. This is not strictly necessary, + // but it makes things a lot simpler. Otherwise, we would need to rename + // some registers, which would complicate the transformation considerably. + if (!MS.isKill()) + return false; + + RegisterRef RT(MS); + unsigned PredR = MP.getReg(); + MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); + if (!DefI || !isPredicable(DefI)) + return false; + + DEBUG(dbgs() << "Source def: " << *DefI); + + // Collect the information about registers defined and used between the + // DefI and the TfrI. + // Map: reg -> bitmask of subregs + ReferenceMap Uses, Defs; + MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; + + // Check if the predicate register is valid between DefI and TfrI. + // If it is, we can then ignore instructions predicated on the negated + // conditions when collecting def and use information. + bool PredValid = true; + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + if (!I->modifiesRegister(PredR, 0)) + continue; + PredValid = false; + break; + } + + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + MachineInstr *MI = &*I; + // If this instruction is predicated on the same register, it could + // potentially be ignored. + // By default assume that the instruction executes on the same condition + // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). + unsigned Exec = Exec_Then | Exec_Else; + if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR)) + Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else; + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + // We don't want to deal with physical registers. The reason is that + // they can be aliased with other physical registers. Aliased virtual + // registers must share the same register number, and can only differ + // in the subregisters, which we are keeping track of. Physical + // registers ters no longer have subregisters---their super- and + // subregisters are other physical registers, and we are not checking + // that. + RegisterRef RR = Op; + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + + ReferenceMap &Map = Op.isDef() ? Defs : Uses; + addRefToMap(RR, Map, Exec); + } + } + + // The situation: + // RT = DefI + // ... + // RD = TfrI ..., RT + + // If the register-in-the-middle (RT) is used or redefined between + // DefI and TfrI, we may not be able proceed with this transformation. + // We can ignore a def that will not execute together with TfrI, and a + // use that will. If there is such a use (that does execute together with + // TfrI), we will not be able to move DefI down. If there is a use that + // executed if TfrI's condition is false, then RT must be available + // unconditionally (cannot be predicated). + // Essentially, we need to be able to rename RT to RD in this segment. + if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else)) + return false; + RegisterRef RD = MD; + // If the predicate register is defined between DefI and TfrI, the only + // potential thing to do would be to move the DefI down to TfrI, and then + // predicate. The reaching def (DefI) must be movable down to the location + // of the TfrI. + // If the target register of the TfrI (RD) is not used or defined between + // DefI and TfrI, consider moving TfrI up to DefI. + bool CanUp = canMoveOver(TfrI, Defs, Uses); + bool CanDown = canMoveOver(DefI, Defs, Uses); + // The TfrI does not access memory, but DefI could. Check if it's safe + // to move DefI down to TfrI. + if (DefI->mayLoad() || DefI->mayStore()) + if (!canMoveMemTo(DefI, TfrI, true)) + CanDown = false; + + DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") + << ", can move down: " << (CanDown ? "yes\n" : "no\n")); + MachineBasicBlock::iterator PastDefIt = std::next(DefIt); + if (CanUp) + predicateAt(RD, DefI, PastDefIt, PredR, Cond); + else if (CanDown) + predicateAt(RD, DefI, TfrIt, PredR, Cond); + else + return false; + + if (RT != RD) + renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); + + // Delete the user of RT first (it should work either way, but this order + // of deleting is more natural). + removeInstrFromLiveness(TfrI); + removeInstrFromLiveness(DefI); + return true; +} + + +/// Predicate all cases of conditional copies in the specified block. +bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + unsigned Opc = I->getOpcode(); + if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { + bool Done = predicate(I, (Opc == Hexagon::A2_tfrt)); + if (!Done) { + // If we didn't predicate I, we may need to remove it in case it is + // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. + if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) + removeInstrFromLiveness(I); + } + Changed |= Done; + } + } + return Changed; +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) { + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit()) + MI->RemoveOperand(i-1); + } +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) { + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (HII->isPredicated(MI)) + removeImplicitUses(MI); + } +} + + +void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) { + // Implicit uses that are "undef" are only meaningful (outside of the + // internals of this pass) when the instruction defines a subregister, + // and the implicit-undef use applies to the defined register. In such + // cases, the proper way to record the information in the IR is to mark + // the definition as "undef", which will be interpreted as "read-undef". + typedef SmallSet<unsigned,2> RegisterSet; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + RegisterSet Undefs; + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) { + MI->RemoveOperand(i-1); + Undefs.insert(MO.getReg()); + } + } + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || !Op.getSubReg()) + continue; + if (Undefs.count(Op.getReg())) + Op.setIsUndef(true); + } + } +} + + +bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); + if (RC == &Hexagon::IntRegsRegClass) { + BW = 32; + return true; + } + if (RC == &Hexagon::DoubleRegsRegClass) { + BW = (RR.Sub != 0) ? 32 : 64; + return true; + } + return false; +} + + +bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + LiveRange::Segment &LR = *I; + // Range must start at a register... + if (!LR.start.isRegister()) + return false; + // ...and end in a register or in a dead slot. + if (!LR.end.isRegister() && !LR.end.isDead()) + return false; + } + return true; +} + + +bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { + if (CoaLimitActive) { + if (CoaCounter >= CoaLimit) + return false; + CoaCounter++; + } + unsigned BW1, BW2; + if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2) + return false; + if (MRI->isLiveIn(R1.Reg)) + return false; + if (MRI->isLiveIn(R2.Reg)) + return false; + + LiveInterval &L1 = LIS->getInterval(R1.Reg); + LiveInterval &L2 = LIS->getInterval(R2.Reg); + bool Overlap = L1.overlaps(L2); + + DEBUG(dbgs() << "compatible registers: (" + << (Overlap ? "overlap" : "disjoint") << ")\n " + << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " + << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); + if (R1.Sub || R2.Sub) + return false; + if (Overlap) + return false; + + // Coalescing could have a negative impact on scheduling, so try to limit + // to some reasonable extent. Only consider coalescing segments, when one + // of them does not cross basic block boundaries. + if (!isIntraBlocks(L1) && !isIntraBlocks(L2)) + return false; + + MRI->replaceRegWith(R2.Reg, R1.Reg); + + // Move all live segments from L2 to L1. + typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap; + ValueInfoMap VM; + for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) { + VNInfo *NewVN, *OldVN = I->valno; + ValueInfoMap::iterator F = VM.find(OldVN); + if (F == VM.end()) { + NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); + VM.insert(std::make_pair(OldVN, NewVN)); + } else { + NewVN = F->second; + } + L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); + } + while (L2.begin() != L2.end()) + L2.removeSegment(*L2.begin()); + + updateKillFlags(R1.Reg, L1); + DEBUG(dbgs() << "coalesced: " << L1 << "\n"); + L1.verify(); + + return true; +} + + +/// Attempt to coalesce one of the source registers to a MUX intruction with +/// the destination register. This could lead to having only one predicated +/// instruction in the end instead of two. +bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { + SmallVector<MachineInstr*,16> Condsets; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &B = *I; + for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) { + MachineInstr *MI = &*J; + if (!isCondset(MI)) + continue; + MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); + if (!S1.isReg() && !S2.isReg()) + continue; + Condsets.push_back(MI); + } + } + + bool Changed = false; + for (unsigned i = 0, n = Condsets.size(); i < n; ++i) { + MachineInstr *CI = Condsets[i]; + RegisterRef RD = CI->getOperand(0); + RegisterRef RP = CI->getOperand(1); + MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); + bool Done = false; + // Consider this case: + // vreg1 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = C2_mux ..., vreg1, vreg2 + // If vreg0 was coalesced with vreg1, we could end up with the following + // code: + // vreg0 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = A2_tfrf ..., vreg2 + // which will later become: + // vreg0 = instr1 ... + // vreg0 = instr2_cNotPt ... + // i.e. there will be an unconditional definition (instr1) of vreg0 + // followed by a conditional one. The output dependency was there before + // and it unavoidable, but if instr1 is predicable, we will no longer be + // able to predicate it here. + // To avoid this scenario, don't coalesce the destination register with + // a source register that is defined by a predicable instruction. + if (S1.isReg()) { + RegisterRef RS = S1; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S1)); + } + if (!Done && S2.isReg()) { + RegisterRef RS = S2; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S2)); + } + Changed |= Done; + } + return Changed; +} + + +bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { + HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + LIS = &getAnalysis<LiveIntervals>(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + + // Try to coalesce the target of a mux with one of its sources. + // This could eliminate a register copy in some circumstances. + Changed |= coalesceSegments(MF); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + // First, simply split all muxes into a pair of conditional transfers + // and update the live intervals to reflect the new arrangement. + // This is done mainly to make the live interval update simpler, than it + // would be while trying to predicate instructions at the same time. + Changed |= splitInBlock(*I); + // Traverse all blocks and collapse predicable instructions feeding + // conditional transfers into predicated instructions. + // Walk over all the instructions again, so we may catch pre-existing + // cases that were not created in the previous step. + Changed |= predicateInBlock(*I); + } + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + postprocessUndefImplicitUses(*I); + return Changed; +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Expand Condsets"; + PassInfo *PI = new PassInfo(Name, "expand-condsets", + &HexagonExpandCondsets::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + + +FunctionPass *llvm::createHexagonExpandCondsets() { + return new HexagonExpandCondsets(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp new file mode 100644 index 0000000..6e2dbc0 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -0,0 +1,357 @@ +//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// The Hexagon processor has no instructions that load or store predicate +// registers directly. So, when these registers must be spilled a general +// purpose register must be found and the value copied to/from it from/to +// the predicate register. This code currently does not use the register +// scavenger mechanism available in the allocator. There are two registers +// reserved to allow spilling/restoring predicate registers. One is used to +// hold the predicate value. The other is used when stack frame offsets are +// too large. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + + +namespace llvm { + FunctionPass *createHexagonExpandPredSpillCode(); + void initializeHexagonExpandPredSpillCodePass(PassRegistry&); +} + + +namespace { + +class HexagonExpandPredSpillCode : public MachineFunctionPass { + public: + static char ID; + HexagonExpandPredSpillCode() : MachineFunctionPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeHexagonExpandPredSpillCodePass(Registry); + } + + const char *getPassName() const override { + return "Hexagon Expand Predicate Spill Code"; + } + bool runOnMachineFunction(MachineFunction &Fn) override; +}; + + +char HexagonExpandPredSpillCode::ID = 0; + + +bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { + + const HexagonSubtarget &QST = Fn.getSubtarget<HexagonSubtarget>(); + const HexagonInstrInfo *TII = QST.getInstrInfo(); + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock *MBB = &*MBBb; + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::S2_storerb_pci_pseudo || + Opc == Hexagon::S2_storerh_pci_pseudo || + Opc == Hexagon::S2_storeri_pci_pseudo || + Opc == Hexagon::S2_storerd_pci_pseudo || + Opc == Hexagon::S2_storerf_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pci_pseudo) + Opcode = Hexagon::S2_storerd_pci; + else if (Opc == Hexagon::S2_storeri_pci_pseudo) + Opcode = Hexagon::S2_storeri_pci; + else if (Opc == Hexagon::S2_storerh_pci_pseudo) + Opcode = Hexagon::S2_storerh_pci; + else if (Opc == Hexagon::S2_storerf_pci_pseudo) + Opcode = Hexagon::S2_storerf_pci; + else if (Opc == Hexagon::S2_storerb_pci_pseudo) + Opcode = Hexagon::S2_storerb_pci; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + MachineOperand &Op4 = MI->getOperand(4); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(Op4); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pci_pseudo || + Opc == Hexagon::L2_loadri_pci_pseudo || + Opc == Hexagon::L2_loadrh_pci_pseudo || + Opc == Hexagon::L2_loadruh_pci_pseudo|| + Opc == Hexagon::L2_loadrb_pci_pseudo || + Opc == Hexagon::L2_loadrub_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pci_pseudo) + Opcode = Hexagon::L2_loadrd_pci; + else if (Opc == Hexagon::L2_loadri_pci_pseudo) + Opcode = Hexagon::L2_loadri_pci; + else if (Opc == Hexagon::L2_loadrh_pci_pseudo) + Opcode = Hexagon::L2_loadrh_pci; + else if (Opc == Hexagon::L2_loadruh_pci_pseudo) + Opcode = Hexagon::L2_loadruh_pci; + else if (Opc == Hexagon::L2_loadrb_pci_pseudo) + Opcode = Hexagon::L2_loadrb_pci; + else if (Opc == Hexagon::L2_loadrub_pci_pseudo) + Opcode = Hexagon::L2_loadrub_pci; + else + llvm_unreachable("wrong Opc"); + + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + MachineOperand &Op5 = MI->getOperand(5); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(Op5); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo || + Opc == Hexagon::L2_loadri_pbr_pseudo || + Opc == Hexagon::L2_loadrh_pbr_pseudo || + Opc == Hexagon::L2_loadruh_pbr_pseudo|| + Opc == Hexagon::L2_loadrb_pbr_pseudo || + Opc == Hexagon::L2_loadrub_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pbr_pseudo) + Opcode = Hexagon::L2_loadrd_pbr; + else if (Opc == Hexagon::L2_loadri_pbr_pseudo) + Opcode = Hexagon::L2_loadri_pbr; + else if (Opc == Hexagon::L2_loadrh_pbr_pseudo) + Opcode = Hexagon::L2_loadrh_pbr; + else if (Opc == Hexagon::L2_loadruh_pbr_pseudo) + Opcode = Hexagon::L2_loadruh_pbr; + else if (Opc == Hexagon::L2_loadrb_pbr_pseudo) + Opcode = Hexagon::L2_loadrb_pbr; + else if (Opc == Hexagon::L2_loadrub_pbr_pseudo) + Opcode = Hexagon::L2_loadrub_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::S2_storerd_pbr_pseudo || + Opc == Hexagon::S2_storeri_pbr_pseudo || + Opc == Hexagon::S2_storerh_pbr_pseudo || + Opc == Hexagon::S2_storerb_pbr_pseudo || + Opc == Hexagon::S2_storerf_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pbr_pseudo) + Opcode = Hexagon::S2_storerd_pbr; + else if (Opc == Hexagon::S2_storeri_pbr_pseudo) + Opcode = Hexagon::S2_storeri_pbr; + else if (Opc == Hexagon::S2_storerh_pbr_pseudo) + Opcode = Hexagon::S2_storerh_pbr; + else if (Opc == Hexagon::S2_storerf_pbr_pseudo) + Opcode = Hexagon::S2_storerf_pbr; + else if (Opc == Hexagon::S2_storerb_pbr_pseudo) + Opcode = Hexagon::S2_storerb_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::STriw_pred) { + // STriw_pred [R30], ofst, SrcReg; + unsigned FP = MI->getOperand(0).getReg(); + assert(FP == QST.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(1).isImm() && "Not an offset"); + int Offset = MI->getOperand(1).getImm(); + int SrcReg = MI->getOperand(2).getReg(); + assert(Hexagon::PredRegsRegClass.contains(SrcReg) && + "Not a predicate register"); + if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) { + if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add), + HEXAGON_RESERVED_REG_1) + .addReg(FP).addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::S2_storeri_io)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::S2_storeri_io)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0) + .addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::S2_storeri_io)). + addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::LDriw_pred) { + // DstReg = LDriw_pred [R30], ofst. + int DstReg = MI->getOperand(0).getReg(); + assert(Hexagon::PredRegsRegClass.contains(DstReg) && + "Not a predicate register"); + unsigned FP = MI->getOperand(1).getReg(); + assert(FP == QST.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(2).isImm() && "Not an offset"); + int Offset = MI->getOperand(2).getImm(); + if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) { + if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add), + HEXAGON_RESERVED_REG_1) + .addReg(FP) + .addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), + HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Expand Predicate Spill Code"; + PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred", + &HexagonExpandPredSpillCode::ID, + nullptr, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass* +llvm::createHexagonExpandPredSpillCode() { + return new HexagonExpandPredSpillCode(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp new file mode 100644 index 0000000..d0c7f9c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -0,0 +1,188 @@ +//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// The loop start address in the LOOPn instruction is encoded as a distance +// from the LOOPn instruction itself. If the start address is too far from +// the LOOPn instruction, the instruction needs to use a constant extender. +// This pass will identify and convert such LOOPn instructions to a proper +// form. +//===----------------------------------------------------------------------===// + + +#include "llvm/ADT/DenseMap.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/PassSupport.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +static cl::opt<unsigned> MaxLoopRange( + "hexagon-loop-range", cl::Hidden, cl::init(200), + cl::desc("Restrict range of loopN instructions (testing only)")); + +namespace llvm { + FunctionPass *createHexagonFixupHwLoops(); + void initializeHexagonFixupHwLoopsPass(PassRegistry&); +} + +namespace { + struct HexagonFixupHwLoops : public MachineFunctionPass { + public: + static char ID; + + HexagonFixupHwLoops() : MachineFunctionPass(ID) { + initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Hexagon Hardware Loop Fixup"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// \brief Check the offset between each loop instruction and + /// the loop basic block to determine if we can use the LOOP instruction + /// or if we need to set the LC/SA registers explicitly. + bool fixupLoopInstrs(MachineFunction &MF); + + /// \brief Replace loop instruction with the constant extended + /// version if the loop label is too far from the loop instruction. + void useExtLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII); + }; + + char HexagonFixupHwLoops::ID = 0; +} + +INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup", + "Hexagon Hardware Loops Fixup", false, false) + +FunctionPass *llvm::createHexagonFixupHwLoops() { + return new HexagonFixupHwLoops(); +} + +/// \brief Returns true if the instruction is a hardware loop instruction. +static bool isHardwareLoop(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::J2_loop0r || + MI->getOpcode() == Hexagon::J2_loop0i || + MI->getOpcode() == Hexagon::J2_loop1r || + MI->getOpcode() == Hexagon::J2_loop1i; +} + +bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { + return fixupLoopInstrs(MF); +} + +/// \brief For Hexagon, if the loop label is to far from the +/// loop instruction then we need to set the LC0 and SA0 registers +/// explicitly instead of using LOOP(start,count). This function +/// checks the distance, and generates register assignments if needed. +/// +/// This function makes two passes over the basic blocks. The first +/// pass computes the offset of the basic block from the start. +/// The second pass checks all the loop instructions. +bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { + + // Offset of the current instruction from the start. + unsigned InstOffset = 0; + // Map for each basic block to it's first instruction. + DenseMap<const MachineBasicBlock *, unsigned> BlockToInstOffset; + + const HexagonInstrInfo *HII = + static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + // First pass - compute the offset of each basic block. + for (const MachineBasicBlock &MBB : MF) { + if (MBB.getAlignment()) { + // Although we don't know the exact layout of the final code, we need + // to account for alignment padding somehow. This heuristic pads each + // aligned basic block according to the alignment value. + int ByteAlign = (1u << MBB.getAlignment()) - 1; + InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + } + + BlockToInstOffset[&MBB] = InstOffset; + for (const MachineInstr &MI : MBB) + InstOffset += HII->getSize(&MI); + } + + // Second pass - check each loop instruction to see if it needs to be + // converted. + InstOffset = 0; + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + InstOffset = BlockToInstOffset[&MBB]; + + // Loop over all the instructions. + MachineBasicBlock::iterator MII = MBB.begin(); + MachineBasicBlock::iterator MIE = MBB.end(); + while (MII != MIE) { + InstOffset += HII->getSize(&*MII); + if (MII->isDebugValue()) { + ++MII; + continue; + } + if (isHardwareLoop(MII)) { + assert(MII->getOperand(0).isMBB() && + "Expect a basic block as loop operand"); + int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + if ((unsigned)abs(diff) > MaxLoopRange) { + useExtLoopInstr(MF, MII); + MII = MBB.erase(MII); + Changed = true; + } else { + ++MII; + } + } else { + ++MII; + } + } + } + + return Changed; +} + +/// \brief Replace loop instructions with the constant extended version. +void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII) { + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + MachineBasicBlock *MBB = MII->getParent(); + DebugLoc DL = MII->getDebugLoc(); + MachineInstrBuilder MIB; + unsigned newOp; + switch (MII->getOpcode()) { + case Hexagon::J2_loop0r: + newOp = Hexagon::J2_loop0rext; + break; + case Hexagon::J2_loop0i: + newOp = Hexagon::J2_loop0iext; + break; + case Hexagon::J2_loop1r: + newOp = Hexagon::J2_loop1rext; + break; + case Hexagon::J2_loop1i: + newOp = Hexagon::J2_loop1iext; + break; + default: + llvm_unreachable("Invalid Hardware Loop Instruction."); + } + MIB = BuildMI(*MBB, MII, DL, TII->get(newOp)); + + for (unsigned i = 0; i < MII->getNumOperands(); ++i) + MIB.addOperand(MII->getOperand(i)); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp new file mode 100644 index 0000000..7a52a1c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -0,0 +1,1479 @@ +//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-pei" + +#include "HexagonFrameLowering.h" +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +// Hexagon stack frame layout as defined by the ABI: +// +// Incoming arguments +// passed via stack +// | +// | +// SP during function's FP during function's | +// +-- runtime (top of stack) runtime (bottom) --+ | +// | | | +// --++---------------------+------------------+-----------------++-+------- +// | parameter area for | variable-size | fixed-size |LR| arg +// | called functions | local objects | local objects |FP| +// --+----------------------+------------------+-----------------+--+------- +// <- size known -> <- size unknown -> <- size known -> +// +// Low address High address +// +// <--- stack growth +// +// +// - In any circumstances, the outgoing function arguments are always accessi- +// ble using the SP, and the incoming arguments are accessible using the FP. +// - If the local objects are not aligned, they can always be accessed using +// the FP. +// - If there are no variable-sized objects, the local objects can always be +// accessed using the SP, regardless whether they are aligned or not. (The +// alignment padding will be at the bottom of the stack (highest address), +// and so the offset with respect to the SP will be known at the compile- +// -time.) +// +// The only complication occurs if there are both, local aligned objects, and +// dynamically allocated (variable-sized) objects. The alignment pad will be +// placed between the FP and the local objects, thus preventing the use of the +// FP to access the local objects. At the same time, the variable-sized objects +// will be between the SP and the local objects, thus introducing an unknown +// distance from the SP to the locals. +// +// To avoid this problem, a new register is created that holds the aligned +// address of the bottom of the stack, referred in the sources as AP (aligned +// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad +// that aligns AP to the required boundary (a maximum of the alignments of +// all stack objects, fixed- and variable-sized). All local objects[1] will +// then use AP as the base pointer. +// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get +// their name from being allocated at fixed locations on the stack, relative +// to the FP. In the presence of dynamic allocation and local alignment, such +// objects can only be accessed through the FP. +// +// Illustration of the AP: +// FP --+ +// | +// ---------------+---------------------+-----+-----------------------++-+-- +// Rest of the | Local stack objects | Pad | Fixed stack objects |LR| +// stack frame | (aligned) | | (CSR, spills, etc.) |FP| +// ---------------+---------------------+-----+-----------------+-----+--+-- +// |<-- Multiple of the -->| +// stack alignment +-- AP +// +// The AP is set up at the beginning of the function. Since it is not a dedi- +// cated (reserved) register, it needs to be kept live throughout the function +// to be available as the base register for local object accesses. +// Normally, an address of a stack objects is obtained by a pseudo-instruction +// TFR_FI. To access local objects with the AP register present, a different +// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra +// argument compared to TFR_FI: the first input register is the AP register. +// This keeps the register live between its definition and its uses. + +// The AP register is originally set up using pseudo-instruction ALIGNA: +// AP = ALIGNA A +// where +// A - required stack alignment +// The alignment value must be the maximum of all alignments required by +// any stack object. + +// The dynamic allocation uses a pseudo-instruction ALLOCA: +// Rd = ALLOCA Rs, A +// where +// Rd - address of the allocated space +// Rs - minimum size (the actual allocated can be larger to accommodate +// alignment) +// A - required alignment + + +using namespace llvm; + +static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret", + cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target")); + + +static cl::opt<int> NumberScavengerSlots("number-scavenger-slots", + cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2), + cl::ZeroOrMore); + +static cl::opt<int> SpillFuncThreshold("spill-func-threshold", + cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"), + cl::init(6), cl::ZeroOrMore); + +static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os", + cl::Hidden, cl::desc("Specify Os spill func threshold"), + cl::init(1), cl::ZeroOrMore); + +static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame", + cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable stack frame shrink wrapping")); + +static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), + cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame " + "shrink-wraps")); + +static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true), + cl::Hidden, cl::desc("Use allocframe more conservatively")); + + +namespace llvm { + void initializeHexagonCallFrameInformationPass(PassRegistry&); + FunctionPass *createHexagonCallFrameInformation(); +} + +namespace { + class HexagonCallFrameInformation : public MachineFunctionPass { + public: + static char ID; + HexagonCallFrameInformation() : MachineFunctionPass(ID) { + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonCallFrameInformationPass(PR); + } + bool runOnMachineFunction(MachineFunction &MF) override; + }; + + char HexagonCallFrameInformation::ID = 0; +} + +bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { + auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering(); + bool NeedCFI = MF.getMMI().hasDebugInfo() || + MF.getFunction()->needsUnwindTableEntry(); + + if (!NeedCFI) + return false; + HFI.insertCFIInstructions(MF); + return true; +} + +INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi", + "Hexagon call frame information", false, false) + +FunctionPass *llvm::createHexagonCallFrameInformation() { + return new HexagonCallFrameInformation(); +} + + +namespace { + /// Map a register pair Reg to the subregister that has the greater "number", + /// i.e. D3 (aka R7:6) will be mapped to R7, etc. + unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI, + bool hireg = true) { + if (Reg < Hexagon::D0 || Reg > Hexagon::D15) + return Reg; + + unsigned RegNo = 0; + for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) { + if (hireg) { + if (*SubRegs > RegNo) + RegNo = *SubRegs; + } else { + if (!RegNo || *SubRegs < RegNo) + RegNo = *SubRegs; + } + } + return RegNo; + } + + /// Returns the callee saved register with the largest id in the vector. + unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo &TRI) { + assert(Hexagon::R1 > 0 && + "Assume physical registers are encoded as positive integers"); + if (CSI.empty()) + return 0; + + unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI); + for (unsigned I = 1, E = CSI.size(); I < E; ++I) { + unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI); + if (Reg > Max) + Max = Reg; + } + return Max; + } + + /// Checks if the basic block contains any instruction that needs a stack + /// frame to be already in place. + bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) { + for (auto &I : MBB) { + const MachineInstr *MI = &I; + if (MI->isCall()) + return true; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::ALLOCA: + case Hexagon::ALIGNA: + return true; + default: + break; + } + // Check individual operands. + for (const MachineOperand &MO : MI->operands()) { + // While the presence of a frame index does not prove that a stack + // frame will be required, all frame indexes should be within alloc- + // frame/deallocframe. Otherwise, the code that translates a frame + // index into an offset would have to be aware of the placement of + // the frame creation/destruction instructions. + if (MO.isFI()) + return true; + if (!MO.isReg()) + continue; + unsigned R = MO.getReg(); + // Virtual registers will need scavenging, which then may require + // a stack slot. + if (TargetRegisterInfo::isVirtualRegister(R)) + return true; + if (CSR[R]) + return true; + } + } + return false; + } + + /// Returns true if MBB has a machine instructions that indicates a tail call + /// in the block. + bool hasTailCall(const MachineBasicBlock &MBB) { + MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); + unsigned RetOpc = I->getOpcode(); + return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr; + } + + /// Returns true if MBB contains an instruction that returns. + bool hasReturn(const MachineBasicBlock &MBB) { + for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I) + if (I->isReturn()) + return true; + return false; + } +} + + +/// Implements shrink-wrapping of the stack frame. By default, stack frame +/// is created in the function entry block, and is cleaned up in every block +/// that returns. This function finds alternate blocks: one for the frame +/// setup (prolog) and one for the cleanup (epilog). +void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, + MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const { + static unsigned ShrinkCounter = 0; + + if (ShrinkLimit.getPosition()) { + if (ShrinkCounter >= ShrinkLimit) + return; + ShrinkCounter++; + } + + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HRI = *HST.getRegisterInfo(); + + MachineDominatorTree MDT; + MDT.runOnMachineFunction(MF); + MachinePostDominatorTree MPT; + MPT.runOnMachineFunction(MF); + + typedef DenseMap<unsigned,unsigned> UnsignedMap; + UnsignedMap RPO; + typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType; + RPOTType RPOT(&MF); + unsigned RPON = 0; + for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) + RPO[(*I)->getNumber()] = RPON++; + + // Don't process functions that have loops, at least for now. Placement + // of prolog and epilog must take loop structure into account. For simpli- + // city don't do it right now. + for (auto &I : MF) { + unsigned BN = RPO[I.getNumber()]; + for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) { + // If found a back-edge, return. + if (RPO[(*SI)->getNumber()] <= BN) + return; + } + } + + // Collect the set of blocks that need a stack frame to execute. Scan + // each block for uses/defs of callee-saved registers, calls, etc. + SmallVector<MachineBasicBlock*,16> SFBlocks; + BitVector CSR(Hexagon::NUM_TARGET_REGS); + for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P) + CSR[*P] = true; + + for (auto &I : MF) + if (needsStackFrame(I, CSR)) + SFBlocks.push_back(&I); + + DEBUG({ + dbgs() << "Blocks needing SF: {"; + for (auto &B : SFBlocks) + dbgs() << " BB#" << B->getNumber(); + dbgs() << " }\n"; + }); + // No frame needed? + if (SFBlocks.empty()) + return; + + // Pick a common dominator and a common post-dominator. + MachineBasicBlock *DomB = SFBlocks[0]; + for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { + DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]); + if (!DomB) + break; + } + MachineBasicBlock *PDomB = SFBlocks[0]; + for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { + PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]); + if (!PDomB) + break; + } + DEBUG({ + dbgs() << "Computed dom block: BB#"; + if (DomB) dbgs() << DomB->getNumber(); + else dbgs() << "<null>"; + dbgs() << ", computed pdom block: BB#"; + if (PDomB) dbgs() << PDomB->getNumber(); + else dbgs() << "<null>"; + dbgs() << "\n"; + }); + if (!DomB || !PDomB) + return; + + // Make sure that DomB dominates PDomB and PDomB post-dominates DomB. + if (!MDT.dominates(DomB, PDomB)) { + DEBUG(dbgs() << "Dom block does not dominate pdom block\n"); + return; + } + if (!MPT.dominates(PDomB, DomB)) { + DEBUG(dbgs() << "PDom block does not post-dominate dom block\n"); + return; + } + + // Finally, everything seems right. + PrologB = DomB; + EpilogB = PDomB; +} + +/// Perform most of the PEI work here: +/// - saving/restoring of the callee-saved registers, +/// - stack frame creation and destruction. +/// Normally, this work is distributed among various functions, but doing it +/// in one place allows shrink-wrapping of the stack frame. +void HexagonFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HRI = *HST.getRegisterInfo(); + + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr; + if (EnableShrinkWrapping) + findShrunkPrologEpilog(MF, PrologB, EpilogB); + + insertCSRSpillsInBlock(*PrologB, CSI, HRI); + insertPrologueInBlock(*PrologB); + + if (EpilogB) { + insertCSRRestoresInBlock(*EpilogB, CSI, HRI); + insertEpilogueInBlock(*EpilogB); + } else { + for (auto &B : MF) + if (B.isReturnBlock()) + insertCSRRestoresInBlock(B, CSI, HRI); + + for (auto &B : MF) + if (B.isReturnBlock()) + insertEpilogueInBlock(B); + } +} + + +void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + DebugLoc dl; + + unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment()); + + // Calculate the total stack frame size. + // Get the number of bytes to allocate from the FrameInfo. + unsigned FrameSize = MFI->getStackSize(); + // Round up the max call frame size to the max alignment on the stack. + unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign); + MFI->setMaxCallFrameSize(MaxCFA); + + FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign); + MFI->setStackSize(FrameSize); + + bool AlignStack = (MaxAlign > getStackAlignment()); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned NumBytes = MFI->getStackSize(); + unsigned SP = HRI.getStackRegister(); + unsigned MaxCF = MFI->getMaxCallFrameSize(); + MachineBasicBlock::iterator InsertPt = MBB.begin(); + + auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>(); + auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts(); + + for (auto MI : AdjustRegs) { + assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca"); + expandAlloca(MI, HII, SP, MaxCF); + MI->eraseFromParent(); + } + + if (!hasFP(MF)) + return; + + // Check for overflow. + // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? + const unsigned int ALLOCFRAME_MAX = 16384; + + // Create a dummy memory operand to avoid allocframe from being treated as + // a volatile memory reference. + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, + 4, 4); + + if (NumBytes >= ALLOCFRAME_MAX) { + // Emit allocframe(#0). + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(0) + .addMemOperand(MMO); + + // Subtract offset from frame pointer. + // We use a caller-saved non-parameter register for that. + unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg(); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real), + CallerSavedReg).addImm(NumBytes); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP) + .addReg(SP) + .addReg(CallerSavedReg); + } else { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(NumBytes) + .addMemOperand(MMO); + } + + if (AlignStack) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) + .addReg(SP) + .addImm(-int64_t(MaxAlign)); + } +} + +void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); + if (!hasFP(MF)) + return; + + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + unsigned SP = HRI.getStackRegister(); + + MachineInstr *RetI = nullptr; + for (auto &I : MBB) { + if (!I.isReturn()) + continue; + RetI = &I; + break; + } + unsigned RetOpc = RetI ? RetI->getOpcode() : 0; + + MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); + DebugLoc DL; + if (InsertPt != MBB.end()) + DL = InsertPt->getDebugLoc(); + else if (!MBB.empty()) + DL = std::prev(MBB.end())->getDebugLoc(); + + // Handle EH_RETURN. + if (RetOpc == Hexagon::EH_RETURN_JMPR) { + BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP) + .addReg(SP) + .addReg(Hexagon::R28); + return; + } + + // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc- + // frame instruction if we encounter it. + if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) { + MachineBasicBlock::iterator It = RetI; + ++It; + // Delete all instructions after the RESTORE (except labels). + while (It != MBB.end()) { + if (!It->isLabel()) + It = MBB.erase(It); + else + ++It; + } + return; + } + + // It is possible that the restoring code is a call to a library function. + // All of the restore* functions include "deallocframe", so we need to make + // sure that we don't add an extra one. + bool NeedsDeallocframe = true; + if (!MBB.empty() && InsertPt != MBB.begin()) { + MachineBasicBlock::iterator PrevIt = std::prev(InsertPt); + unsigned COpc = PrevIt->getOpcode(); + if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) + NeedsDeallocframe = false; + } + + if (!NeedsDeallocframe) + return; + // If the returning instruction is JMPret, replace it with dealloc_return, + // otherwise just add deallocframe. The function could be returning via a + // tail call. + if (RetOpc != Hexagon::JMPret || DisableDeallocRet) { + BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); + return; + } + unsigned NewOpc = Hexagon::L4_return; + MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc)); + // Transfer the function live-out registers. + NewI->copyImplicitOps(MF, RetI); + MBB.erase(RetI); +} + + +namespace { + bool IsAllocFrame(MachineBasicBlock::const_iterator It) { + if (!It->isBundle()) + return It->getOpcode() == Hexagon::S2_allocframe; + auto End = It->getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I = It.getInstrIterator(); + while (++I != End && I->isBundled()) + if (I->getOpcode() == Hexagon::S2_allocframe) + return true; + return false; + } + + MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) { + for (auto &I : B) + if (IsAllocFrame(I)) + return I; + return B.end(); + } +} + + +void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { + for (auto &B : MF) { + auto AF = FindAllocFrame(B); + if (AF == B.end()) + continue; + insertCFIInstructionsAt(B, ++AF); + } +} + + +void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator At) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + + // If CFI instructions have debug information attached, something goes + // wrong with the final assembly generation: the prolog_end is placed + // in a wrong location. + DebugLoc DL; + const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION); + + MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); + + if (hasFP(MF)) { + unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); + unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); + + // Define CFA via an offset from the value of FP. + // + // -8 -4 0 (SP) + // --+----+----+--------------------- + // | FP | LR | increasing addresses --> + // --+----+----+--------------------- + // | +-- Old SP (before allocframe) + // +-- New FP (after allocframe) + // + // MCCFIInstruction::createDefCfa subtracts the offset from the register. + // MCCFIInstruction::createOffset takes the offset without sign change. + auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(DefCfa)); + // R31 (return addr) = CFA - 4 + auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffR31)); + // R30 (frame ptr) = CFA - 8 + auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffR30)); + } + + static unsigned int RegsToMove[] = { + Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, + Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, + Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, + Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, + Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, + Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, + Hexagon::NoRegister + }; + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) { + unsigned Reg = RegsToMove[i]; + auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool { + return C.getReg() == Reg; + }; + auto F = std::find_if(CSI.begin(), CSI.end(), IfR); + if (F == CSI.end()) + continue; + + // Subtract 8 to make room for R30 and R31, which are added above. + unsigned FrameReg; + int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8; + + if (Reg < Hexagon::D0 || Reg > Hexagon::D15) { + unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true); + auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg, + Offset); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffReg)); + } else { + // Split the double regs into subregs, and generate appropriate + // cfi_offsets. + // The only reason, we are split double regs is, llvm-mc does not + // understand paired registers for cfi_offset. + // Eg .cfi_offset r1:0, -64 + + unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg); + unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg); + unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); + unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); + auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, + Offset+4); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffHi)); + auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg, + Offset); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffLo)); + } + } +} + + +bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + + bool HasFixed = MFI.getNumFixedObjects(); + bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI) + .getLocalFrameObjectCount(); + bool HasExtraAlign = HRI.needsStackRealignment(MF); + bool HasAlloca = MFI.hasVarSizedObjects(); + + // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think + // that this shouldn't be required, but doing so now because gcc does and + // gdb can't break at the start of the function without it. Will remove if + // this turns out to be a gdb bug. + // + if (MF.getTarget().getOptLevel() == CodeGenOpt::None) + return true; + + // By default we want to use SP (since it's always there). FP requires + // some setup (i.e. ALLOCFRAME). + // Fixed and preallocated objects need FP if the distance from them to + // the SP is unknown (as is with alloca or aligna). + if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign)) + return true; + + if (MFI.getStackSize() > 0) { + if (UseAllocframe) + return true; + } + + if (MFI.hasCalls() || + MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR()) + return true; + + return false; +} + + +enum SpillKind { + SK_ToMem, + SK_FromMem, + SK_FromMemTailcall +}; + +static const char * +getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) { + const char * V4SpillToMemoryFunctions[] = { + "__save_r16_through_r17", + "__save_r16_through_r19", + "__save_r16_through_r21", + "__save_r16_through_r23", + "__save_r16_through_r25", + "__save_r16_through_r27" }; + + const char * V4SpillFromMemoryFunctions[] = { + "__restore_r16_through_r17_and_deallocframe", + "__restore_r16_through_r19_and_deallocframe", + "__restore_r16_through_r21_and_deallocframe", + "__restore_r16_through_r23_and_deallocframe", + "__restore_r16_through_r25_and_deallocframe", + "__restore_r16_through_r27_and_deallocframe" }; + + const char * V4SpillFromMemoryTailcallFunctions[] = { + "__restore_r16_through_r17_and_deallocframe_before_tailcall", + "__restore_r16_through_r19_and_deallocframe_before_tailcall", + "__restore_r16_through_r21_and_deallocframe_before_tailcall", + "__restore_r16_through_r23_and_deallocframe_before_tailcall", + "__restore_r16_through_r25_and_deallocframe_before_tailcall", + "__restore_r16_through_r27_and_deallocframe_before_tailcall" + }; + + const char **SpillFunc = nullptr; + + switch(SpillType) { + case SK_ToMem: + SpillFunc = V4SpillToMemoryFunctions; + break; + case SK_FromMem: + SpillFunc = V4SpillFromMemoryFunctions; + break; + case SK_FromMemTailcall: + SpillFunc = V4SpillFromMemoryTailcallFunctions; + break; + } + assert(SpillFunc && "Unknown spill kind"); + + // Spill all callee-saved registers up to the highest register used. + switch (MaxReg) { + case Hexagon::R17: + return SpillFunc[0]; + case Hexagon::R19: + return SpillFunc[1]; + case Hexagon::R21: + return SpillFunc[2]; + case Hexagon::R23: + return SpillFunc[3]; + case Hexagon::R25: + return SpillFunc[4]; + case Hexagon::R27: + return SpillFunc[5]; + default: + llvm_unreachable("Unhandled maximum callee save register"); + } + return 0; +} + +/// Adds all callee-saved registers up to MaxReg to the instruction. +static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst, + unsigned MaxReg, bool IsDef) { + // Add the callee-saved registers as implicit uses. + for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) { + MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true); + Inst->addOperand(ImpUse); + } +} + + +int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + + // Large parts of this code are shared with HRI::eliminateFrameIndex. + int Offset = MFI.getObjectOffset(FI); + bool HasAlloca = MFI.hasVarSizedObjects(); + bool HasExtraAlign = HRI.needsStackRealignment(MF); + bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; + + unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); + unsigned AP = 0; + if (const MachineInstr *AI = getAlignaInstr(MF)) + AP = AI->getOperand(0).getReg(); + unsigned FrameSize = MFI.getStackSize(); + + bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). + // Use FP at -O0, except when there are objects with extra alignment. + // That additional alignment requirement may cause a pad to be inserted, + // which will make it impossible to use FP to access objects located + // past the pad. + if (NoOpt && !HasExtraAlign) + UseFP = true; + if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { + // Fixed and preallocated objects will be located before any padding + // so FP must be used to access them. + UseFP |= (HasAlloca || HasExtraAlign); + } else { + if (HasAlloca) { + if (HasExtraAlign) + UseAP = true; + else + UseFP = true; + } + } + + // If FP was picked, then there had better be FP. + bool HasFP = hasFP(MF); + assert((HasFP || !UseFP) && "This function must have frame pointer"); + + // Having FP implies allocframe. Allocframe will store extra 8 bytes: + // FP/LR. If the base register is used to access an object across these + // 8 bytes, then the offset will need to be adjusted by 8. + // + // After allocframe: + // HexagonISelLowering adds 8 to ---+ + // the offsets of all stack-based | + // arguments (*) | + // | + // getObjectOffset < 0 0 8 getObjectOffset >= 8 + // ------------------------+-----+------------------------> increasing + // <local objects> |FP/LR| <input arguments> addresses + // -----------------+------+-----+------------------------> + // | | + // SP/AP point --+ +-- FP points here (**) + // somewhere on + // this side of FP/LR + // + // (*) See LowerFormalArguments. The FP/LR is assumed to be present. + // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR. + + // The lowering assumes that FP/LR is present, and so the offsets of + // the formal arguments start at 8. If FP/LR is not there we need to + // reduce the offset by 8. + if (Offset > 0 && !HasFP) + Offset -= 8; + + if (UseFP) + FrameReg = FP; + else if (UseAP) + FrameReg = AP; + else + FrameReg = SP; + + // Calculate the actual offset in the instruction. If there is no FP + // (in other words, no allocframe), then SP will not be adjusted (i.e. + // there will be no SP -= FrameSize), so the frame size should not be + // added to the calculated offset. + int RealOffset = Offset; + if (!UseFP && !UseAP && HasFP) + RealOffset = FrameSize+Offset; + return RealOffset; +} + + +bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, + const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { + if (CSI.empty()) + return true; + + MachineBasicBlock::iterator MI = MBB.begin(); + MachineFunction &MF = *MBB.getParent(); + auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + + if (useSpillFunction(MF, CSI)) { + unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); + const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem); + // Call spill function. + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); + MachineInstr *SaveRegsCall = + BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) + .addExternalSymbol(SpillFun); + // Add callee-saved registers as use. + addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false); + // Add live in registers. + for (unsigned I = 0; I < CSI.size(); ++I) + MBB.addLiveIn(CSI[I].getReg()); + return true; + } + + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { + unsigned Reg = CSI[i].getReg(); + // Add live in registers. We treat eh_return callee saved register r0 - r3 + // specially. They are not really callee saved registers as they are not + // supposed to be killed. + bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); + int FI = CSI[i].getFrameIdx(); + const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); + HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); + if (IsKill) + MBB.addLiveIn(Reg); + } + return true; +} + + +bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, + const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { + if (CSI.empty()) + return false; + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + MachineFunction &MF = *MBB.getParent(); + auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + + if (useRestoreFunction(MF, CSI)) { + bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); + unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI); + SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem; + const char *RestoreFn = getSpillFunctionFor(MaxR, Kind); + + // Call spill function. + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() + : MBB.getLastNonDebugInstr()->getDebugLoc(); + MachineInstr *DeallocCall = nullptr; + + if (HasTC) { + unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; + DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc)) + .addExternalSymbol(RestoreFn); + } else { + // The block has a return. + MachineBasicBlock::iterator It = MBB.getFirstTerminator(); + assert(It->isReturn() && std::next(It) == MBB.end()); + unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4; + DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc)) + .addExternalSymbol(RestoreFn); + // Transfer the function live-out registers. + DeallocCall->copyImplicitOps(MF, It); + } + addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true); + return true; + } + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); + int FI = CSI[i].getFrameIdx(); + HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); + } + return true; +} + + +void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + unsigned Opc = MI.getOpcode(); + (void)Opc; // Silence compiler warning. + assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) && + "Cannot handle this call frame pseudo instruction"); + MBB.erase(I); +} + + +void HexagonFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + // If this function has uses aligned stack and also has variable sized stack + // objects, then we need to map all spill slots to fixed positions, so that + // they can be accessed through FP. Otherwise they would have to be accessed + // via AP, which may not be available at the particular place in the program. + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool HasAlloca = MFI->hasVarSizedObjects(); + bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment()); + + if (!HasAlloca || !NeedsAlign) + return; + + unsigned LFS = MFI->getLocalFrameSize(); + int Offset = -LFS; + for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i)) + continue; + int S = MFI->getObjectSize(i); + LFS += S; + Offset -= S; + MFI->mapLocalFrameObject(i, Offset); + } + + MFI->setLocalFrameSize(LFS); + unsigned A = MFI->getLocalFrameMaxAlign(); + assert(A <= 8 && "Unexpected local frame alignment"); + if (A == 0) + MFI->setLocalFrameMaxAlign(8); + MFI->setUseLocalStackAllocationBlock(true); +} + +/// Returns true if there is no caller saved registers available. +static bool needToReserveScavengingSpillSlots(MachineFunction &MF, + const HexagonRegisterInfo &HRI) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF); + // Check for an unused caller-saved register. + for ( ; *CallerSavedRegs; ++CallerSavedRegs) { + MCPhysReg FreeReg = *CallerSavedRegs; + if (!MRI.reg_nodbg_empty(FreeReg)) + continue; + + // Check aliased register usage. + bool IsCurrentRegUsed = false; + for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) + if (!MRI.reg_nodbg_empty(*AI)) { + IsCurrentRegUsed = true; + break; + } + if (IsCurrentRegUsed) + continue; + + // Neither directly used nor used through an aliased register. + return false; + } + // All caller-saved registers are used. + return true; +} + + +/// Replaces the predicate spill code pseudo instructions by valid instructions. +bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF) + const { + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HII = *HST.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool HasReplacedPseudoInst = false; + // Replace predicate spill pseudo instructions by real code. + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock *MBB = &*MBBb; + // Traverse the basic block. + MachineBasicBlock::iterator NextII; + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + MII = NextII) { + MachineInstr *MI = MII; + NextII = std::next(MII); + int Opc = MI->getOpcode(); + if (Opc == Hexagon::STriw_pred) { + HasReplacedPseudoInst = true; + // STriw_pred FI, 0, SrcReg; + unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + unsigned SrcReg = MI->getOperand(2).getReg(); + bool IsOrigSrcRegKilled = MI->getOperand(2).isKill(); + + assert(MI->getOperand(0).isFI() && "Expect a frame index"); + assert(Hexagon::PredRegsRegClass.contains(SrcReg) && + "Not a predicate register"); + + // Insert transfer to general purpose register. + // VirtReg = C2_tfrpr SrcPredReg + BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr), + VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled)); + + // Change instruction to S2_storeri_io. + // S2_storeri_io FI, 0, VirtReg + MI->setDesc(HII.get(Hexagon::S2_storeri_io)); + MI->getOperand(2).setReg(VirtReg); + MI->getOperand(2).setIsKill(); + + } else if (Opc == Hexagon::LDriw_pred) { + // DstReg = LDriw_pred FI, 0 + MachineOperand &M0 = MI->getOperand(0); + if (M0.isDead()) { + MBB->erase(MII); + continue; + } + + unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + unsigned DestReg = MI->getOperand(0).getReg(); + + assert(MI->getOperand(1).isFI() && "Expect a frame index"); + assert(Hexagon::PredRegsRegClass.contains(DestReg) && + "Not a predicate register"); + + // Change instruction to L2_loadri_io. + // VirtReg = L2_loadri_io FI, 0 + MI->setDesc(HII.get(Hexagon::L2_loadri_io)); + MI->getOperand(0).setReg(VirtReg); + + // Insert transfer to general purpose register. + // DestReg = C2_tfrrp VirtReg + const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp); + BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg) + .addReg(VirtReg, getKillRegState(true)); + HasReplacedPseudoInst = true; + } + } + } + return HasReplacedPseudoInst; +} + + +void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); + auto &HRI = *HST.getRegisterInfo(); + + bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn(); + + // If we have a function containing __builtin_eh_return we want to spill and + // restore all callee saved registers. Pretend that they are used. + if (HasEHReturn) { + for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs; + ++CSRegs) + SavedRegs.set(*CSRegs); + } + + const TargetRegisterClass &RC = Hexagon::IntRegsRegClass; + + // Replace predicate register pseudo spill code. + bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF); + + // We need to reserve a a spill slot if scavenging could potentially require + // spilling a scavenged register. + if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + for (int i=0; i < NumberScavengerSlots; i++) + RS->addScavengingFrameIndex( + MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment())); + } +} + + +#ifndef NDEBUG +static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { + dbgs() << '{'; + for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) { + unsigned R = x; + dbgs() << ' ' << PrintReg(R, &TRI); + } + dbgs() << " }"; +} +#endif + + +bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on " + << MF.getFunction()->getName() << '\n'); + MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector SRegs(Hexagon::NUM_TARGET_REGS); + + // Generate a set of unique, callee-saved registers (SRegs), where each + // register in the set is maximal in terms of sub-/super-register relation, + // i.e. for each R in SRegs, no proper super-register of R is also in SRegs. + + // (1) For each callee-saved register, add that register and all of its + // sub-registers to SRegs. + DEBUG(dbgs() << "Initial CS registers: {"); + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { + unsigned R = CSI[i].getReg(); + DEBUG(dbgs() << ' ' << PrintReg(R, TRI)); + for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) + SRegs[*SR] = true; + } + DEBUG(dbgs() << " }\n"); + DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // (2) For each reserved register, remove that register and all of its + // sub- and super-registers from SRegs. + BitVector Reserved = TRI->getReservedRegs(MF); + for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) { + unsigned R = x; + for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR) + SRegs[*SR] = false; + } + DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n"); + DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // (3) Collect all registers that have at least one sub-register in SRegs, + // and also have no sub-registers that are reserved. These will be the can- + // didates for saving as a whole instead of their individual sub-registers. + // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.) + BitVector TmpSup(Hexagon::NUM_TARGET_REGS); + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) + TmpSup[*SR] = true; + } + for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) { + unsigned R = x; + for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) { + if (!Reserved[*SR]) + continue; + TmpSup[R] = false; + break; + } + } + DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n"); + + // (4) Include all super-registers found in (3) into SRegs. + SRegs |= TmpSup; + DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // (5) For each register R in SRegs, if any super-register of R is in SRegs, + // remove R from SRegs. + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) { + if (!SRegs[*SR]) + continue; + SRegs[R] = false; + break; + } + } + DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + + // Now, for each register that has a fixed stack slot, create the stack + // object for it. + CSI.clear(); + + typedef TargetFrameLowering::SpillSlot SpillSlot; + unsigned NumFixed; + int MinOffset = 0; // CS offsets are negative. + const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed); + for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) { + if (!SRegs[S->Reg]) + continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); + int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset); + MinOffset = std::min(MinOffset, S->Offset); + CSI.push_back(CalleeSavedInfo(S->Reg, FI)); + SRegs[S->Reg] = false; + } + + // There can be some registers that don't have fixed slots. For example, + // we need to store R0-R3 in functions with exception handling. For each + // such register, create a non-fixed stack object. + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); + int Off = MinOffset - RC->getSize(); + unsigned Align = std::min(RC->getAlignment(), getStackAlignment()); + assert(isPowerOf2_32(Align)); + Off &= -Align; + int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off); + MinOffset = std::min(MinOffset, Off); + CSI.push_back(CalleeSavedInfo(R, FI)); + SRegs[R] = false; + } + + DEBUG({ + dbgs() << "CS information: {"; + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { + int FI = CSI[i].getFrameIdx(); + int Off = MFI->getObjectOffset(FI); + dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp"; + if (Off >= 0) + dbgs() << '+'; + dbgs() << Off; + } + dbgs() << " }\n"; + }); + +#ifndef NDEBUG + // Verify that all registers were handled. + bool MissedReg = false; + for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { + unsigned R = x; + dbgs() << PrintReg(R, TRI) << ' '; + MissedReg = true; + } + if (MissedReg) + llvm_unreachable("...there are unhandled callee-saved registers!"); +#endif + + return true; +} + + +void HexagonFrameLowering::expandAlloca(MachineInstr *AI, + const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { + MachineBasicBlock &MB = *AI->getParent(); + DebugLoc DL = AI->getDebugLoc(); + unsigned A = AI->getOperand(2).getImm(); + + // Have + // Rd = alloca Rs, #A + // + // If Rs and Rd are different registers, use this sequence: + // Rd = sub(r29, Rs) + // r29 = sub(r29, Rs) + // Rd = and(Rd, #-A) ; if necessary + // r29 = and(r29, #-A) ; if necessary + // Rd = add(Rd, #CF) ; CF size aligned to at most A + // otherwise, do + // Rd = sub(r29, Rs) + // Rd = and(Rd, #-A) ; if necessary + // r29 = Rd + // Rd = add(Rd, #CF) ; CF size aligned to at most A + + MachineOperand &RdOp = AI->getOperand(0); + MachineOperand &RsOp = AI->getOperand(1); + unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg(); + + // Rd = sub(r29, Rs) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd) + .addReg(SP) + .addReg(Rs); + if (Rs != Rd) { + // r29 = sub(r29, Rs) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP) + .addReg(SP) + .addReg(Rs); + } + if (A > 8) { + // Rd = and(Rd, #-A) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd) + .addReg(Rd) + .addImm(-int64_t(A)); + if (Rs != Rd) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP) + .addReg(SP) + .addImm(-int64_t(A)); + } + if (Rs == Rd) { + // r29 = Rd + BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP) + .addReg(Rd); + } + if (CF > 0) { + // Rd = add(Rd, #CF) + BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd) + .addReg(Rd) + .addImm(CF); + } +} + + +bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + if (!MFI->hasVarSizedObjects()) + return false; + unsigned MaxA = MFI->getMaxAlignment(); + if (MaxA <= getStackAlignment()) + return false; + return true; +} + + +const MachineInstr *HexagonFrameLowering::getAlignaInstr( + const MachineFunction &MF) const { + for (auto &B : MF) + for (auto &I : B) + if (I.getOpcode() == Hexagon::ALIGNA) + return &I; + return nullptr; +} + + +// FIXME: Use Function::optForSize(). +inline static bool isOptSize(const MachineFunction &MF) { + AttributeSet AF = MF.getFunction()->getAttributes(); + return AF.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); +} + +inline static bool isMinSize(const MachineFunction &MF) { + return MF.getFunction()->optForMinSize(); +} + + +/// Determine whether the callee-saved register saves and restores should +/// be generated via inline code. If this function returns "true", inline +/// code will be generated. If this function returns "false", additional +/// checks are performed, which may still lead to the inline code. +bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, + const CSIVect &CSI) const { + if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) + return true; + if (!isOptSize(MF) && !isMinSize(MF)) + if (MF.getTarget().getOptLevel() > CodeGenOpt::Default) + return true; + + // Check if CSI only has double registers, and if the registers form + // a contiguous block starting from D8. + BitVector Regs(Hexagon::NUM_TARGET_REGS); + for (unsigned i = 0, n = CSI.size(); i < n; ++i) { + unsigned R = CSI[i].getReg(); + if (!Hexagon::DoubleRegsRegClass.contains(R)) + return true; + Regs[R] = true; + } + int F = Regs.find_first(); + if (F != Hexagon::D8) + return true; + while (F >= 0) { + int N = Regs.find_next(F); + if (N >= 0 && N != F+1) + return true; + F = N; + } + + return false; +} + + +bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, + const CSIVect &CSI) const { + if (shouldInlineCSR(MF, CSI)) + return false; + unsigned NumCSI = CSI.size(); + if (NumCSI <= 1) + return false; + + unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs + : SpillFuncThreshold; + return Threshold < NumCSI; +} + + +bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, + const CSIVect &CSI) const { + if (shouldInlineCSR(MF, CSI)) + return false; + unsigned NumCSI = CSI.size(); + unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1 + : SpillFuncThreshold; + return Threshold < NumCSI; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h new file mode 100644 index 0000000..683b303 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -0,0 +1,109 @@ +//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H + +#include "Hexagon.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + +class HexagonInstrInfo; +class HexagonRegisterInfo; + +class HexagonFrameLowering : public TargetFrameLowering { +public: + explicit HexagonFrameLowering() + : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {} + + // All of the prolog/epilog functionality, including saving and restoring + // callee-saved registers is handled in emitPrologue. This is to have the + // logic for shrink-wrapping in one place. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const + override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const + override {} + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override { + return true; + } + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override { + return true; + } + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = nullptr) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + bool targetHandlesStackFrameRounding() const override { + return true; + } + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; + bool hasFP(const MachineFunction &MF) const override; + + const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) + const override { + static const SpillSlot Offsets[] = { + { Hexagon::R17, -4 }, { Hexagon::R16, -8 }, { Hexagon::D8, -8 }, + { Hexagon::R19, -12 }, { Hexagon::R18, -16 }, { Hexagon::D9, -16 }, + { Hexagon::R21, -20 }, { Hexagon::R20, -24 }, { Hexagon::D10, -24 }, + { Hexagon::R23, -28 }, { Hexagon::R22, -32 }, { Hexagon::D11, -32 }, + { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 }, + { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 } + }; + NumEntries = array_lengthof(Offsets); + return Offsets; + } + + bool assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) + const override; + + bool needsAligna(const MachineFunction &MF) const; + const MachineInstr *getAlignaInstr(const MachineFunction &MF) const; + + void insertCFIInstructions(MachineFunction &MF) const; + +private: + typedef std::vector<CalleeSavedInfo> CSIVect; + + void expandAlloca(MachineInstr *AI, const HexagonInstrInfo &TII, + unsigned SP, unsigned CF) const; + void insertPrologueInBlock(MachineBasicBlock &MBB) const; + void insertEpilogueInBlock(MachineBasicBlock &MBB) const; + bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, + const HexagonRegisterInfo &HRI) const; + bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, + const HexagonRegisterInfo &HRI) const; + void insertCFIInstructionsAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator At) const; + + void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const; + bool replacePredRegPseudoSpillCode(MachineFunction &MF) const; + bool replaceVecPredRegPseudoSpillCode(MachineFunction &MF) const; + + void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB, + MachineBasicBlock *&EpilogB) const; + + bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const; + bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; + bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp new file mode 100644 index 0000000..f26e2ff --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -0,0 +1,259 @@ +//===--- HexagonGenExtract.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U), + cl::Hidden, cl::desc("Cutoff for generating \"extract\"" + " instructions")); + +// This prevents generating extract instructions that have the offset of 0. +// One of the reasons for "extract" is to put a sequence of bits in a regis- +// ter, starting at offset 0 (so that these bits can then be used by an +// "insert"). If the bits are already at offset 0, it is better not to gene- +// rate "extract", since logical bit operations can be merged into compound +// instructions (as opposed to "extract"). +static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden, + cl::desc("No extract instruction with offset 0")); + +static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden, + cl::desc("Require & in extract patterns")); + +namespace llvm { + void initializeHexagonGenExtractPass(PassRegistry&); + FunctionPass *createHexagonGenExtract(); +} + + +namespace { + class HexagonGenExtract : public FunctionPass { + public: + static char ID; + HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) { + initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const override { + return "Hexagon generate \"extract\" instructions"; + } + virtual bool runOnFunction(Function &F) override; + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<MachineFunctionAnalysis>(); + FunctionPass::getAnalysisUsage(AU); + } + private: + bool visitBlock(BasicBlock *B); + bool convert(Instruction *In); + + unsigned ExtractCount; + DominatorTree *DT; + }; + + char HexagonGenExtract::ID = 0; +} + +INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate " + "\"extract\" instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate " + "\"extract\" instructions", false, false) + + +bool HexagonGenExtract::convert(Instruction *In) { + using namespace PatternMatch; + Value *BF = 0; + ConstantInt *CSL = 0, *CSR = 0, *CM = 0; + BasicBlock *BB = In->getParent(); + LLVMContext &Ctx = BB->getContext(); + bool LogicalSR; + + // (and (shl (lshr x, #sr), #sl), #m) + LogicalSR = true; + bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL)), + m_ConstantInt(CM))); + + if (!Match) { + // (and (shl (ashr x, #sr), #sl), #m) + LogicalSR = false; + Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL)), + m_ConstantInt(CM))); + } + if (!Match) { + // (and (shl x, #sl), #m) + LogicalSR = true; + CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)), + m_ConstantInt(CM))); + if (Match && NoSR0) + return false; + } + if (!Match) { + // (and (lshr x, #sr), #m) + LogicalSR = true; + CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CM))); + } + if (!Match) { + // (and (ashr x, #sr), #m) + LogicalSR = false; + CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CM))); + } + if (!Match) { + CM = 0; + // (shl (lshr x, #sr), #sl) + LogicalSR = true; + Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL))); + } + if (!Match) { + CM = 0; + // (shl (ashr x, #sr), #sl) + LogicalSR = false; + Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL))); + } + if (!Match) + return false; + + Type *Ty = BF->getType(); + if (!Ty->isIntegerTy()) + return false; + unsigned BW = Ty->getPrimitiveSizeInBits(); + if (BW != 32 && BW != 64) + return false; + + uint32_t SR = CSR->getZExtValue(); + uint32_t SL = CSL->getZExtValue(); + + if (!CM) { + // If there was no and, and the shift left did not remove all potential + // sign bits created by the shift right, then extractu cannot reproduce + // this value. + if (!LogicalSR && (SR > SL)) + return false; + APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL); + CM = ConstantInt::get(Ctx, A); + } + + // CM is the shifted-left mask. Shift it back right to remove the zero + // bits on least-significant positions. + APInt M = CM->getValue().lshr(SL); + uint32_t T = M.countTrailingOnes(); + + // During the shifts some of the bits will be lost. Calculate how many + // of the original value will remain after shift right and then left. + uint32_t U = BW - std::max(SL, SR); + // The width of the extracted field is the minimum of the original bits + // that remain after the shifts and the number of contiguous 1s in the mask. + uint32_t W = std::min(U, T); + if (W == 0) + return false; + + // Check if the extracted bits are contained within the mask that it is + // and-ed with. The extract operation will copy these bits, and so the + // mask cannot any holes in it that would clear any of the bits of the + // extracted field. + if (!LogicalSR) { + // If the shift right was arithmetic, it could have included some 1 bits. + // It is still ok to generate extract, but only if the mask eliminates + // those bits (i.e. M does not have any bits set beyond U). + APInt C = APInt::getHighBitsSet(BW, BW-U); + if (M.intersects(C) || !APIntOps::isMask(W, M)) + return false; + } else { + // Check if M starts with a contiguous sequence of W times 1 bits. Get + // the low U bits of M (which eliminates the 0 bits shifted in on the + // left), and check if the result is APInt's "mask": + if (!APIntOps::isMask(W, M.getLoBits(U))) + return false; + } + + IRBuilder<> IRB(In); + Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu + : Intrinsic::hexagon_S2_extractup; + Module *Mod = BB->getParent()->getParent(); + Value *ExtF = Intrinsic::getDeclaration(Mod, IntId); + Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)}); + if (SL != 0) + NewIn = IRB.CreateShl(NewIn, SL, CSL->getName()); + In->replaceAllUsesWith(NewIn); + return true; +} + + +bool HexagonGenExtract::visitBlock(BasicBlock *B) { + // Depth-first, bottom-up traversal. + DomTreeNode *DTN = DT->getNode(B); + typedef GraphTraits<DomTreeNode*> GTN; + typedef GTN::ChildIteratorType Iter; + for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I) + visitBlock((*I)->getBlock()); + + // Allow limiting the number of generated extracts for debugging purposes. + bool HasCutoff = ExtractCutoff.getPosition(); + unsigned Cutoff = ExtractCutoff; + + bool Changed = false; + BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin(); + while (true) { + if (HasCutoff && (ExtractCount >= Cutoff)) + return Changed; + bool Last = (I == Begin); + if (!Last) + NextI = std::prev(I); + Instruction *In = &*I; + bool Done = convert(In); + if (HasCutoff && Done) + ExtractCount++; + Changed |= Done; + if (Last) + break; + I = NextI; + } + return Changed; +} + + +bool HexagonGenExtract::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + bool Changed; + + // Traverse the function bottom-up, to see super-expressions before their + // sub-expressions. + BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F); + Changed = visitBlock(Entry); + + return Changed; +} + + +FunctionPass *llvm::createHexagonGenExtract() { + return new HexagonGenExtract(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp new file mode 100644 index 0000000..64a2b6c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -0,0 +1,1599 @@ +//===--- HexagonGenInsert.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexinsert" + +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonBitTracker.h" + +#include <map> +#include <vector> + +using namespace llvm; + +static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), + cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); +// The distance cutoff is selected based on the precheckin-perf results: +// cutoffs 20, 25, 35, and 40 are worse than 30. +static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), + cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " + "generation.")); + +static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); +static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " + "generation")); + +static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, + cl::ZeroOrMore); +static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, + cl::ZeroOrMore); +// Whether to construct constant values via "insert". Could eliminate constant +// extenders, but often not practical. +static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, + cl::ZeroOrMore); + +namespace { + // The preprocessor gets confused when the DEBUG macro is passed larger + // chunks of code. Use this function to detect debugging. + inline bool isDebug() { +#ifndef NDEBUG + return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE); +#else + return false; +#endif + } +} + + +namespace { + // Set of virtual registers, based on BitVector. + struct RegisterSet : private BitVector { + RegisterSet() = default; + explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + + using BitVector::clear; + + unsigned find_first() const { + int First = BitVector::find_first(); + if (First < 0) + return 0; + return x2v(First); + } + + unsigned find_next(unsigned Prev) const { + int Next = BitVector::find_next(v2x(Prev)); + if (Next < 0) + return 0; + return x2v(Next); + } + + RegisterSet &insert(unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return static_cast<RegisterSet&>(BitVector::set(Idx)); + } + RegisterSet &remove(unsigned R) { + unsigned Idx = v2x(R); + if (Idx >= size()) + return *this; + return static_cast<RegisterSet&>(BitVector::reset(Idx)); + } + + RegisterSet &insert(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); + } + RegisterSet &remove(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::reset(Rs)); + } + + reference operator[](unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return BitVector::operator[](Idx); + } + bool operator[](unsigned R) const { + unsigned Idx = v2x(R); + assert(Idx < size()); + return BitVector::operator[](Idx); + } + bool has(unsigned R) const { + unsigned Idx = v2x(R); + if (Idx >= size()) + return false; + return BitVector::test(Idx); + } + + bool empty() const { + return !BitVector::any(); + } + bool includes(const RegisterSet &Rs) const { + // A.BitVector::test(B) <=> A-B != {} + return !Rs.BitVector::test(*this); + } + bool intersects(const RegisterSet &Rs) const { + return BitVector::anyCommon(Rs); + } + + private: + void ensure(unsigned Idx) { + if (size() <= Idx) + resize(std::max(Idx+1, 32U)); + } + static inline unsigned v2x(unsigned v) { + return TargetRegisterInfo::virtReg2Index(v); + } + static inline unsigned x2v(unsigned x) { + return TargetRegisterInfo::index2VirtReg(x); + } + }; + + + struct PrintRegSet { + PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) + : RS(S), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, + const PrintRegSet &P); + private: + const RegisterSet &RS; + const TargetRegisterInfo *TRI; + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { + OS << '{'; + for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) + OS << ' ' << PrintReg(R, P.TRI); + OS << " }"; + return OS; + } +} + + +namespace { + // A convenience class to associate unsigned numbers (such as virtual + // registers) with unsigned numbers. + struct UnsignedMap : public DenseMap<unsigned,unsigned> { + UnsignedMap() : BaseType() {} + private: + typedef DenseMap<unsigned,unsigned> BaseType; + }; + + // A utility to establish an ordering between virtual registers: + // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB] + // This is meant as a cache for the ordering of virtual registers defined + // by a potentially expensive comparison function, or obtained by a proce- + // dure that should not be repeated each time two registers are compared. + struct RegisterOrdering : public UnsignedMap { + RegisterOrdering() : UnsignedMap() {} + unsigned operator[](unsigned VR) const { + const_iterator F = find(VR); + assert(F != end()); + return F->second; + } + // Add operator(), so that objects of this class can be used as + // comparators in std::sort et al. + bool operator() (unsigned VR1, unsigned VR2) const { + return operator[](VR1) < operator[](VR2); + } + }; +} + + +namespace { + // Ordering of bit values. This class does not have operator[], but + // is supplies a comparison operator() for use in std:: algorithms. + // The order is as follows: + // - 0 < 1 < ref + // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg), + // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. + struct BitValueOrdering { + BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} + bool operator() (const BitTracker::BitValue &V1, + const BitTracker::BitValue &V2) const; + const RegisterOrdering &BaseOrd; + }; +} + + +bool BitValueOrdering::operator() (const BitTracker::BitValue &V1, + const BitTracker::BitValue &V2) const { + if (V1 == V2) + return false; + // V1==0 => true, V2==0 => false + if (V1.is(0) || V2.is(0)) + return V1.is(0); + // Neither of V1,V2 is 0, and V1!=V2. + // V2==1 => false, V1==1 => true + if (V2.is(1) || V1.is(1)) + return !V2.is(1); + // Both V1,V2 are refs. + unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg]; + if (Ind1 != Ind2) + return Ind1 < Ind2; + // If V1.Pos==V2.Pos + assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different"); + return V1.RefI.Pos < V2.RefI.Pos; +} + + +namespace { + // Cache for the BitTracker's cell map. Map lookup has a logarithmic + // complexity, this class will memoize the lookup results to reduce + // the access time for repeated lookups of the same cell. + struct CellMapShadow { + CellMapShadow(const BitTracker &T) : BT(T) {} + const BitTracker::RegisterCell &lookup(unsigned VR) { + unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); + // Grow the vector to at least 32 elements. + if (RInd >= CVect.size()) + CVect.resize(std::max(RInd+16, 32U), 0); + const BitTracker::RegisterCell *CP = CVect[RInd]; + if (CP == 0) + CP = CVect[RInd] = &BT.lookup(VR); + return *CP; + } + + const BitTracker &BT; + + private: + typedef std::vector<const BitTracker::RegisterCell*> CellVectType; + CellVectType CVect; + }; +} + + +namespace { + // Comparator class for lexicographic ordering of virtual registers + // according to the corresponding BitTracker::RegisterCell objects. + struct RegisterCellLexCompare { + RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) + : BitOrd(BO), CM(M) {} + bool operator() (unsigned VR1, unsigned VR2) const; + private: + const BitValueOrdering &BitOrd; + CellMapShadow &CM; + }; + + // Comparator class for lexicographic ordering of virtual registers + // according to the specified bits of the corresponding BitTracker:: + // RegisterCell objects. + // Specifically, this class will be used to compare bit B of a register + // cell for a selected virtual register R with bit N of any register + // other than R. + struct RegisterCellBitCompareSel { + RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, + const BitValueOrdering &BO, CellMapShadow &M) + : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} + bool operator() (unsigned VR1, unsigned VR2) const; + private: + const unsigned SelR, SelB; + const unsigned BitN; + const BitValueOrdering &BitOrd; + CellMapShadow &CM; + }; +} + + +bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { + // Ordering of registers, made up from two given orderings: + // - the ordering of the register numbers, and + // - the ordering of register cells. + // Def. R1 < R2 if: + // - cell(R1) < cell(R2), or + // - cell(R1) == cell(R2), and index(R1) < index(R2). + // + // For register cells, the ordering is lexicographic, with index 0 being + // the most significant. + if (VR1 == VR2) + return false; + + const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2); + uint16_t W1 = RC1.width(), W2 = RC2.width(); + for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) { + const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i]; + if (V1 != V2) + return BitOrd(V1, V2); + } + // Cells are equal up until the common length. + if (W1 != W2) + return W1 < W2; + + return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; +} + + +bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { + if (VR1 == VR2) + return false; + const BitTracker::RegisterCell &RC1 = CM.lookup(VR1); + const BitTracker::RegisterCell &RC2 = CM.lookup(VR2); + uint16_t W1 = RC1.width(), W2 = RC2.width(); + uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN; + uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN; + // If Bit1 exceeds the width of VR1, then: + // - return false, if at the same time Bit2 exceeds VR2, or + // - return true, otherwise. + // (I.e. "a bit value that does not exist is less than any bit value + // that does exist".) + if (W1 <= Bit1) + return Bit2 < W2; + // If Bit1 is within VR1, but Bit2 is not within VR2, return false. + if (W2 <= Bit2) + return false; + + const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2]; + if (V1 != V2) + return BitOrd(V1, V2); + return false; +} + + +namespace { + class OrderedRegisterList { + typedef std::vector<unsigned> ListType; + public: + OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {} + void insert(unsigned VR); + void remove(unsigned VR); + unsigned operator[](unsigned Idx) const { + assert(Idx < Seq.size()); + return Seq[Idx]; + } + unsigned size() const { + return Seq.size(); + } + + typedef ListType::iterator iterator; + typedef ListType::const_iterator const_iterator; + iterator begin() { return Seq.begin(); } + iterator end() { return Seq.end(); } + const_iterator begin() const { return Seq.begin(); } + const_iterator end() const { return Seq.end(); } + + // Convenience function to convert an iterator to the corresponding index. + unsigned idx(iterator It) const { return It-begin(); } + private: + ListType Seq; + const RegisterOrdering &Ord; + }; + + + struct PrintORL { + PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) + : RL(L), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); + private: + const OrderedRegisterList &RL; + const TargetRegisterInfo *TRI; + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) { + OS << '('; + OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end(); + for (OrderedRegisterList::const_iterator I = B; I != E; ++I) { + if (I != B) + OS << ", "; + OS << PrintReg(*I, P.TRI); + } + OS << ')'; + return OS; + } +} + + +void OrderedRegisterList::insert(unsigned VR) { + iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); + if (L == Seq.end()) + Seq.push_back(VR); + else + Seq.insert(L, VR); +} + + +void OrderedRegisterList::remove(unsigned VR) { + iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); + assert(L != Seq.end()); + Seq.erase(L); +} + + +namespace { + // A record of the insert form. The fields correspond to the operands + // of the "insert" instruction: + // ... = insert(SrcR, InsR, #Wdh, #Off) + struct IFRecord { + IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) + : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} + unsigned SrcR, InsR; + uint16_t Wdh, Off; + }; + + struct PrintIFR { + PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) + : IFR(R), TRI(RI) {} + private: + const IFRecord &IFR; + const TargetRegisterInfo *TRI; + friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { + unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR; + OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI) + << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')'; + return OS; + } + + typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet; +} + + +namespace llvm { + void initializeHexagonGenInsertPass(PassRegistry&); + FunctionPass *createHexagonGenInsert(); +} + + +namespace { + class HexagonGenInsert : public MachineFunctionPass { + public: + static char ID; + HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) { + initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Hexagon generate \"insert\" instructions"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType; + + void buildOrderingMF(RegisterOrdering &RO) const; + void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const; + bool isIntClass(const TargetRegisterClass *RC) const; + bool isConstant(unsigned VR) const; + bool isSmallConstant(unsigned VR) const; + bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, + uint16_t L, uint16_t S) const; + bool findSelfReference(unsigned VR) const; + bool findNonSelfReference(unsigned VR) const; + void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const; + void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const; + unsigned distance(const MachineBasicBlock *FromB, + const MachineBasicBlock *ToB, const UnsignedMap &RPO, + PairMapType &M) const; + unsigned distance(MachineBasicBlock::const_iterator FromI, + MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, + PairMapType &M) const; + bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs); + void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs); + void findRemovableRegisters(unsigned VR, IFRecord IF, + RegisterSet &RMs) const; + void computeRemovableRegisters(); + + void pruneEmptyLists(); + void pruneCoveredSets(unsigned VR); + void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M); + void pruneRegCopies(unsigned VR); + void pruneCandidates(); + void selectCandidates(); + bool generateInserts(); + + bool removeDeadCode(MachineDomTreeNode *N); + + // IFRecord coupled with a set of potentially removable registers: + typedef std::vector<IFRecordWithRegSet> IFListType; + typedef DenseMap<unsigned,IFListType> IFMapType; // vreg -> IFListType + + void dump_map() const; + + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + + MachineFunction *MFN; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + CellMapShadow *CMS; + + RegisterOrdering BaseOrd; + RegisterOrdering CellOrd; + IFMapType IFMap; + }; + + char HexagonGenInsert::ID = 0; +} + + +void HexagonGenInsert::dump_map() const { + typedef IFMapType::const_iterator iterator; + for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + dbgs() << " " << PrintReg(I->first, HRI) << ":\n"; + const IFListType &LL = I->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", " + << PrintRegSet(LL[i].second, HRI) << '\n'; + } +} + + +void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { + unsigned Index = 0; + typedef MachineFunction::const_iterator mf_iterator; + for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) { + const MachineBasicBlock &B = *A; + if (!CMS->BT.reached(&B)) + continue; + typedef MachineBasicBlock::const_iterator mb_iterator; + for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) { + const MachineInstr *MI = &*I; + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned R = MO.getReg(); + assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); + if (TargetRegisterInfo::isVirtualRegister(R)) + RO.insert(std::make_pair(R, Index++)); + } + } + } + } + // Since some virtual registers may have had their def and uses eliminated, + // they are no longer referenced in the code, and so they will not appear + // in the map. +} + + +void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, + RegisterOrdering &RO) const { + // Create a vector of all virtual registers (collect them from the base + // ordering RB), and then sort it using the RegisterCell comparator. + BitValueOrdering BVO(RB); + RegisterCellLexCompare LexCmp(BVO, *CMS); + typedef std::vector<unsigned> SortableVectorType; + SortableVectorType VRs; + for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) + VRs.push_back(I->first); + std::sort(VRs.begin(), VRs.end(), LexCmp); + // Transfer the results to the outgoing register ordering. + for (unsigned i = 0, n = VRs.size(); i < n; ++i) + RO.insert(std::make_pair(VRs[i], i)); +} + + +inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { + return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; +} + + +bool HexagonGenInsert::isConstant(unsigned VR) const { + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + uint16_t W = RC.width(); + for (uint16_t i = 0; i < W; ++i) { + const BitTracker::BitValue &BV = RC[i]; + if (BV.is(0) || BV.is(1)) + continue; + return false; + } + return true; +} + + +bool HexagonGenInsert::isSmallConstant(unsigned VR) const { + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + uint16_t W = RC.width(); + if (W > 64) + return false; + uint64_t V = 0, B = 1; + for (uint16_t i = 0; i < W; ++i) { + const BitTracker::BitValue &BV = RC[i]; + if (BV.is(1)) + V |= B; + else if (!BV.is(0)) + return false; + B <<= 1; + } + + // For 32-bit registers, consider: Rd = #s16. + if (W == 32) + return isInt<16>(V); + + // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8) + return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); +} + + +bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, + unsigned InsR, uint16_t L, uint16_t S) const { + const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); + const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR); + const TargetRegisterClass *InsRC = MRI->getRegClass(InsR); + // Only integet (32-/64-bit) register classes. + if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC)) + return false; + // The "source" register must be of the same class as DstR. + if (DstRC != SrcRC) + return false; + if (DstRC == InsRC) + return true; + // A 64-bit register can only be generated from other 64-bit registers. + if (DstRC == &Hexagon::DoubleRegsRegClass) + return false; + // Otherwise, the L and S cannot span 32-bit word boundary. + if (S < 32 && S+L > 32) + return false; + return true; +} + + +bool HexagonGenInsert::findSelfReference(unsigned VR) const { + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + for (uint16_t i = 0, w = RC.width(); i < w; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR) + return true; + } + return false; +} + + +bool HexagonGenInsert::findNonSelfReference(unsigned VR) const { + BitTracker::RegisterCell RC = CMS->lookup(VR); + for (uint16_t i = 0, w = RC.width(); i < w; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR) + return true; + } + return false; +} + + +void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, + RegisterSet &Defs) const { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Defs.insert(R); + } +} + + +void HexagonGenInsert::getInstrUses(const MachineInstr *MI, + RegisterSet &Uses) const { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.insert(R); + } +} + + +unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, + const MachineBasicBlock *ToB, const UnsignedMap &RPO, + PairMapType &M) const { + // Forward distance from the end of a block to the beginning of it does + // not make sense. This function should not be called with FromB == ToB. + assert(FromB != ToB); + + unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber(); + // If we have already computed it, return the cached result. + PairMapType::iterator F = M.find(std::make_pair(FromN, ToN)); + if (F != M.end()) + return F->second; + unsigned ToRPO = RPO.lookup(ToN); + + unsigned MaxD = 0; + typedef MachineBasicBlock::const_pred_iterator pred_iterator; + for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) { + const MachineBasicBlock *PB = *I; + // Skip back edges. Also, if FromB is a predecessor of ToB, the distance + // along that path will be 0, and we don't need to do any calculations + // on it. + if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO) + continue; + unsigned D = PB->size() + distance(FromB, PB, RPO, M); + if (D > MaxD) + MaxD = D; + } + + // Memoize the result for later lookup. + M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD)); + return MaxD; +} + + +unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, + MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, + PairMapType &M) const { + const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent(); + if (FB == TB) + return std::distance(FromI, ToI); + unsigned D1 = std::distance(TB->begin(), ToI); + unsigned D2 = distance(FB, TB, RPO, M); + unsigned D3 = std::distance(FromI, FB->end()); + return D1+D2+D3; +} + + +bool HexagonGenInsert::findRecordInsertForms(unsigned VR, + OrderedRegisterList &AVs) { + if (isDebug()) { + dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI) + << " AVs: " << PrintORL(AVs, HRI) << "\n"; + } + if (AVs.size() == 0) + return false; + + typedef OrderedRegisterList::iterator iterator; + BitValueOrdering BVO(BaseOrd); + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + uint16_t W = RC.width(); + + typedef std::pair<unsigned,uint16_t> RSRecord; // (reg,shift) + typedef std::vector<RSRecord> RSListType; + // Have a map, with key being the matching prefix length, and the value + // being the list of pairs (R,S), where R's prefix matches VR at S. + // (DenseMap<uint16_t,RSListType> fails to instantiate.) + typedef DenseMap<unsigned,RSListType> LRSMapType; + LRSMapType LM; + + // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S, + // and find matching prefixes from AVs with the rotated RC. Such a prefix + // would match a string of bits (of length L) in RC starting at S. + for (uint16_t S = 0; S < W; ++S) { + iterator B = AVs.begin(), E = AVs.end(); + // The registers in AVs are ordered according to the lexical order of + // the corresponding register cells. This means that the range of regis- + // ters in AVs that match a prefix of length L+1 will be contained in + // the range that matches a prefix of length L. This means that we can + // keep narrowing the search space as the prefix length goes up. This + // helps reduce the overall complexity of the search. + uint16_t L; + for (L = 0; L < W-S; ++L) { + // Compare against VR's bits starting at S, which emulates rotation + // of VR by S. + RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS); + iterator NewB = std::lower_bound(B, E, VR, RCB); + iterator NewE = std::upper_bound(NewB, E, VR, RCB); + // For the registers that are eliminated from the next range, L is + // the longest prefix matching VR at position S (their prefixes + // differ from VR at S+L). If L>0, record this information for later + // use. + if (L > 0) { + for (iterator I = B; I != NewB; ++I) + LM[L].push_back(std::make_pair(*I, S)); + for (iterator I = NewE; I != E; ++I) + LM[L].push_back(std::make_pair(*I, S)); + } + B = NewB, E = NewE; + if (B == E) + break; + } + // Record the final register range. If this range is non-empty, then + // L=W-S. + assert(B == E || L == W-S); + if (B != E) { + for (iterator I = B; I != E; ++I) + LM[L].push_back(std::make_pair(*I, S)); + // If B!=E, then we found a range of registers whose prefixes cover the + // rest of VR from position S. There is no need to further advance S. + break; + } + } + + if (isDebug()) { + dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n"; + for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) { + dbgs() << " L=" << I->first << ':'; + const RSListType &LL = I->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@" + << LL[i].second << ')'; + dbgs() << '\n'; + } + } + + + bool Recorded = false; + + for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { + unsigned SrcR = *I; + int FDi = -1, LDi = -1; // First/last different bit. + const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); + uint16_t AW = AC.width(); + for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) { + if (RC[i] == AC[i]) + continue; + if (FDi == -1) + FDi = i; + LDi = i; + } + if (FDi == -1) + continue; // TODO (future): Record identical registers. + // Look for a register whose prefix could patch the range [FD..LD] + // where VR and SrcR differ. + uint16_t FD = FDi, LD = LDi; // Switch to unsigned type. + uint16_t MinL = LD-FD+1; + for (uint16_t L = MinL; L < W; ++L) { + LRSMapType::iterator F = LM.find(L); + if (F == LM.end()) + continue; + RSListType &LL = F->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) { + uint16_t S = LL[i].second; + // MinL is the minimum length of the prefix. Any length above MinL + // allows some flexibility as to where the prefix can start: + // given the extra length EL=L-MinL, the prefix must start between + // max(0,FD-EL) and FD. + if (S > FD) // Starts too late. + continue; + uint16_t EL = L-MinL; + uint16_t LowS = (EL < FD) ? FD-EL : 0; + if (S < LowS) // Starts too early. + continue; + unsigned InsR = LL[i].first; + if (!isValidInsertForm(VR, SrcR, InsR, L, S)) + continue; + if (isDebug()) { + dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI) + << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#" + << S << ")\n"; + } + IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet()); + IFMap[VR].push_back(RR); + Recorded = true; + } + } + } + + return Recorded; +} + + +void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, + OrderedRegisterList &AVs) { + if (isDebug()) + dbgs() << "visiting block BB#" << B->getNumber() << "\n"; + + // First, check if this block is reachable at all. If not, the bit tracker + // will not have any information about registers in it. + if (!CMS->BT.reached(B)) + return; + + bool DoConst = OptConst; + // Keep a separate set of registers defined in this block, so that we + // can remove them from the list of available registers once all DT + // successors have been processed. + RegisterSet BlockDefs, InsDefs; + for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { + MachineInstr *MI = &*I; + InsDefs.clear(); + getInstrDefs(MI, InsDefs); + // Leave those alone. They are more transparent than "insert". + bool Skip = MI->isCopy() || MI->isRegSequence(); + + if (!Skip) { + // Visit all defined registers, and attempt to find the corresponding + // "insert" representations. + for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) { + // Do not collect registers that are known to be compile-time cons- + // tants, unless requested. + if (!DoConst && isConstant(VR)) + continue; + // If VR's cell contains a reference to VR, then VR cannot be defined + // via "insert". If VR is a constant that can be generated in a single + // instruction (without constant extenders), generating it via insert + // makes no sense. + if (findSelfReference(VR) || isSmallConstant(VR)) + continue; + + findRecordInsertForms(VR, AVs); + } + } + + // Insert the defined registers into the list of available registers + // after they have been processed. + for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) + AVs.insert(VR); + BlockDefs.insert(InsDefs); + } + + MachineDomTreeNode *N = MDT->getNode(B); + typedef GraphTraits<MachineDomTreeNode*> GTN; + typedef GTN::ChildIteratorType ChildIter; + for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + collectInBlock(SB, AVs); + } + + for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR)) + AVs.remove(VR); +} + + +void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, + RegisterSet &RMs) const { + // For a given register VR and a insert form, find the registers that are + // used by the current definition of VR, and which would no longer be + // needed for it after the definition of VR is replaced with the insert + // form. These are the registers that could potentially become dead. + RegisterSet Regs[2]; + + unsigned S = 0; // Register set selector. + Regs[S].insert(VR); + + while (!Regs[S].empty()) { + // Breadth-first search. + unsigned OtherS = 1-S; + Regs[OtherS].clear(); + for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) { + Regs[S].remove(R); + if (R == IF.SrcR || R == IF.InsR) + continue; + // Check if a given register has bits that are references to any other + // registers. This is to detect situations where the instruction that + // defines register R takes register Q as an operand, but R itself does + // not contain any bits from Q. Loads are examples of how this could + // happen: + // R = load Q + // In this case (assuming we do not have any knowledge about the loaded + // value), we must not treat R as a "conveyance" of the bits from Q. + // (The information in BT about R's bits would have them as constants, + // in case of zero-extending loads, or refs to R.) + if (!findNonSelfReference(R)) + continue; + RMs.insert(R); + const MachineInstr *DefI = MRI->getVRegDef(R); + assert(DefI); + // Do not iterate past PHI nodes to avoid infinite loops. This can + // make the final set a bit less accurate, but the removable register + // sets are an approximation anyway. + if (DefI->isPHI()) + continue; + getInstrUses(DefI, Regs[OtherS]); + } + S = OtherS; + } + // The register VR is added to the list as a side-effect of the algorithm, + // but it is not "potentially removable". A potentially removable register + // is one that may become unused (dead) after conversion to the insert form + // IF, and obviously VR (or its replacement) will not become dead by apply- + // ing IF. + RMs.remove(VR); +} + + +void HexagonGenInsert::computeRemovableRegisters() { + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + IFListType &LL = I->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + findRemovableRegisters(I->first, LL[i].first, LL[i].second); + } +} + + +void HexagonGenInsert::pruneEmptyLists() { + // Remove all entries from the map, where the register has no insert forms + // associated with it. + typedef SmallVector<IFMapType::iterator,16> IterListType; + IterListType Prune; + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + if (I->second.size() == 0) + Prune.push_back(I); + } + for (unsigned i = 0, n = Prune.size(); i < n; ++i) + IFMap.erase(Prune[i]); +} + + +void HexagonGenInsert::pruneCoveredSets(unsigned VR) { + IFMapType::iterator F = IFMap.find(VR); + assert(F != IFMap.end()); + IFListType &LL = F->second; + + // First, examine the IF candidates for register VR whose removable-regis- + // ter sets are empty. This means that a given candidate will not help eli- + // minate any registers, but since "insert" is not a constant-extendable + // instruction, using such a candidate may reduce code size if the defini- + // tion of VR is constant-extended. + // If there exists a candidate with a non-empty set, the ones with empty + // sets will not be used and can be removed. + MachineInstr *DefVR = MRI->getVRegDef(VR); + bool DefEx = HII->isConstExtended(DefVR); + bool HasNE = false; + for (unsigned i = 0, n = LL.size(); i < n; ++i) { + if (LL[i].second.empty()) + continue; + HasNE = true; + break; + } + if (!DefEx || HasNE) { + // The definition of VR is not constant-extended, or there is a candidate + // with a non-empty set. Remove all candidates with empty sets. + auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { + return IR.second.empty(); + }; + auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty); + if (End != LL.end()) + LL.erase(End, LL.end()); + } else { + // The definition of VR is constant-extended, and all candidates have + // empty removable-register sets. Pick the maximum candidate, and remove + // all others. The "maximum" does not have any special meaning here, it + // is only so that the candidate that will remain on the list is selec- + // ted deterministically. + IFRecord MaxIF = LL[0].first; + for (unsigned i = 1, n = LL.size(); i < n; ++i) { + // If LL[MaxI] < LL[i], then MaxI = i. + const IFRecord &IF = LL[i].first; + unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR]; + unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR]; + if (M0 > R0) + continue; + if (M0 == R0) { + if (M1 > R1) + continue; + if (M1 == R1) { + if (MaxIF.Wdh > IF.Wdh) + continue; + if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off) + continue; + } + } + // MaxIF < IF. + MaxIF = IF; + } + // Remove everything except the maximum candidate. All register sets + // are empty, so no need to preserve anything. + LL.clear(); + LL.push_back(std::make_pair(MaxIF, RegisterSet())); + } + + // Now, remove those whose sets of potentially removable registers are + // contained in another IF candidate for VR. For example, given these + // candidates for vreg45, + // %vreg45: + // (%vreg44,%vreg41,#9,#8), { %vreg42 } + // (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 } + // remove the first one, since it is contained in the second one. + for (unsigned i = 0, n = LL.size(); i < n; ) { + const RegisterSet &RMi = LL[i].second; + unsigned j = 0; + while (j < n) { + if (j != i && LL[j].second.includes(RMi)) + break; + j++; + } + if (j == n) { // RMi not contained in anything else. + i++; + continue; + } + LL.erase(LL.begin()+i); + n = LL.size(); + } +} + + +void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, + PairMapType &M) { + IFMapType::iterator F = IFMap.find(VR); + assert(F != IFMap.end()); + IFListType &LL = F->second; + unsigned Cutoff = VRegDistCutoff; + const MachineInstr *DefV = MRI->getVRegDef(VR); + + for (unsigned i = LL.size(); i > 0; --i) { + unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR; + const MachineInstr *DefS = MRI->getVRegDef(SR); + const MachineInstr *DefI = MRI->getVRegDef(IR); + unsigned DSV = distance(DefS, DefV, RPO, M); + if (DSV < Cutoff) { + unsigned DIV = distance(DefI, DefV, RPO, M); + if (DIV < Cutoff) + continue; + } + LL.erase(LL.begin()+(i-1)); + } +} + + +void HexagonGenInsert::pruneRegCopies(unsigned VR) { + IFMapType::iterator F = IFMap.find(VR); + assert(F != IFMap.end()); + IFListType &LL = F->second; + + auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { + return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); + }; + auto End = std::remove_if(LL.begin(), LL.end(), IsCopy); + if (End != LL.end()) + LL.erase(End, LL.end()); +} + + +void HexagonGenInsert::pruneCandidates() { + // Remove candidates that are not beneficial, regardless of the final + // selection method. + // First, remove candidates whose potentially removable set is a subset + // of another candidate's set. + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) + pruneCoveredSets(I->first); + + UnsignedMap RPO; + typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType; + RPOTType RPOT(MFN); + unsigned RPON = 0; + for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) + RPO[(*I)->getNumber()] = RPON++; + + PairMapType Memo; // Memoization map for distance calculation. + // Remove candidates that would use registers defined too far away. + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) + pruneUsesTooFar(I->first, RPO, Memo); + + pruneEmptyLists(); + + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) + pruneRegCopies(I->first); +} + + +namespace { + // Class for comparing IF candidates for registers that have multiple of + // them. The smaller the candidate, according to this ordering, the better. + // First, compare the number of zeros in the associated potentially remova- + // ble register sets. "Zero" indicates that the register is very likely to + // become dead after this transformation. + // Second, compare "averages", i.e. use-count per size. The lower wins. + // After that, it does not really matter which one is smaller. Resolve + // the tie in some deterministic way. + struct IFOrdering { + IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) + : UseC(UC), BaseOrd(BO) {} + bool operator() (const IFRecordWithRegSet &A, + const IFRecordWithRegSet &B) const; + private: + void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, + unsigned &Sum) const; + const UnsignedMap &UseC; + const RegisterOrdering &BaseOrd; + }; +} + + +bool IFOrdering::operator() (const IFRecordWithRegSet &A, + const IFRecordWithRegSet &B) const { + unsigned SizeA = 0, ZeroA = 0, SumA = 0; + unsigned SizeB = 0, ZeroB = 0, SumB = 0; + stats(A.second, SizeA, ZeroA, SumA); + stats(B.second, SizeB, ZeroB, SumB); + + // We will pick the minimum element. The more zeros, the better. + if (ZeroA != ZeroB) + return ZeroA > ZeroB; + // Compare SumA/SizeA with SumB/SizeB, lower is better. + uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA; + if (AvgA != AvgB) + return AvgA < AvgB; + + // The sets compare identical so far. Resort to comparing the IF records. + // The actual values don't matter, this is only for determinism. + unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR]; + if (OSA != OSB) + return OSA < OSB; + unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR]; + if (OIA != OIB) + return OIA < OIB; + if (A.first.Wdh != B.first.Wdh) + return A.first.Wdh < B.first.Wdh; + return A.first.Off < B.first.Off; +} + + +void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, + unsigned &Sum) const { + for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { + UnsignedMap::const_iterator F = UseC.find(R); + assert(F != UseC.end()); + unsigned UC = F->second; + if (UC == 0) + Zero++; + Sum += UC; + Size++; + } +} + + +void HexagonGenInsert::selectCandidates() { + // Some registers may have multiple valid candidates. Pick the best one + // (or decide not to use any). + + // Compute the "removability" measure of R: + // For each potentially removable register R, record the number of regis- + // ters with IF candidates, where R appears in at least one set. + RegisterSet AllRMs; + UnsignedMap UseC, RemC; + IFMapType::iterator End = IFMap.end(); + + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + const IFListType &LL = I->second; + RegisterSet TT; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + TT.insert(LL[i].second); + for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) + RemC[R]++; + AllRMs.insert(TT); + } + + for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) { + typedef MachineRegisterInfo::use_nodbg_iterator use_iterator; + typedef SmallSet<const MachineInstr*,16> InstrSet; + InstrSet UIs; + // Count as the number of instructions in which R is used, not the + // number of operands. + use_iterator E = MRI->use_nodbg_end(); + for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I) + UIs.insert(I->getParent()); + unsigned C = UIs.size(); + // Calculate a measure, which is the number of instructions using R, + // minus the "removability" count computed earlier. + unsigned D = RemC[R]; + UseC[R] = (C > D) ? C-D : 0; // doz + } + + + bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; + if (!SelectAll0 && !SelectHas0) + SelectAll0 = true; + + // The smaller the number UseC for a given register R, the "less used" + // R is aside from the opportunities for removal offered by generating + // "insert" instructions. + // Iterate over the IF map, and for those registers that have multiple + // candidates, pick the minimum one according to IFOrdering. + IFOrdering IFO(UseC, BaseOrd); + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + IFListType &LL = I->second; + if (LL.empty()) + continue; + // Get the minimum element, remember it and clear the list. If the + // element found is adequate, we will put it back on the list, other- + // wise the list will remain empty, and the entry for this register + // will be removed (i.e. this register will not be replaced by insert). + IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO); + assert(MinI != LL.end()); + IFRecordWithRegSet M = *MinI; + LL.clear(); + + // We want to make sure that this replacement will have a chance to be + // beneficial, and that means that we want to have indication that some + // register will be removed. The most likely registers to be eliminated + // are the use operands in the definition of I->first. Accept/reject a + // candidate based on how many of its uses it can potentially eliminate. + + RegisterSet Us; + const MachineInstr *DefI = MRI->getVRegDef(I->first); + getInstrUses(DefI, Us); + bool Accept = false; + + if (SelectAll0) { + bool All0 = true; + for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { + if (UseC[R] == 0) + continue; + All0 = false; + break; + } + Accept = All0; + } else if (SelectHas0) { + bool Has0 = false; + for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { + if (UseC[R] != 0) + continue; + Has0 = true; + break; + } + Accept = Has0; + } + if (Accept) + LL.push_back(M); + } + + // Remove candidates that add uses of removable registers, unless the + // removable registers are among replacement candidates. + // Recompute the removable registers, since some candidates may have + // been eliminated. + AllRMs.clear(); + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + const IFListType &LL = I->second; + if (LL.size() > 0) + AllRMs.insert(LL[0].second); + } + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + IFListType &LL = I->second; + if (LL.size() == 0) + continue; + unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; + if (AllRMs[SR] || AllRMs[IR]) + LL.clear(); + } + + pruneEmptyLists(); +} + + +bool HexagonGenInsert::generateInserts() { + // Create a new register for each one from IFMap, and store them in the + // map. + UnsignedMap RegMap; + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + unsigned VR = I->first; + const TargetRegisterClass *RC = MRI->getRegClass(VR); + unsigned NewVR = MRI->createVirtualRegister(RC); + RegMap[VR] = NewVR; + } + + // We can generate the "insert" instructions using potentially stale re- + // gisters: SrcR and InsR for a given VR may be among other registers that + // are also replaced. This is fine, we will do the mass "rauw" a bit later. + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + MachineInstr *MI = MRI->getVRegDef(I->first); + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned NewR = RegMap[I->first]; + bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; + const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) + : HII->get(Hexagon::S2_insertp); + IFRecord IF = I->second[0].first; + unsigned Wdh = IF.Wdh, Off = IF.Off; + unsigned InsS = 0; + if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { + InsS = Hexagon::subreg_loreg; + if (Off >= 32) { + InsS = Hexagon::subreg_hireg; + Off -= 32; + } + } + // Advance to the proper location for inserting instructions. This could + // be B.end(). + MachineBasicBlock::iterator At = MI; + if (MI->isPHI()) + At = B.getFirstNonPHI(); + + BuildMI(B, At, DL, D, NewR) + .addReg(IF.SrcR) + .addReg(IF.InsR, 0, InsS) + .addImm(Wdh) + .addImm(Off); + + MRI->clearKillFlags(IF.SrcR); + MRI->clearKillFlags(IF.InsR); + } + + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + MachineInstr *DefI = MRI->getVRegDef(I->first); + MRI->replaceRegWith(I->first, RegMap[I->first]); + DefI->eraseFromParent(); + } + + return true; +} + + +bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { + bool Changed = false; + typedef GraphTraits<MachineDomTreeNode*> GTN; + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) + Changed |= removeDeadCode(*I); + + MachineBasicBlock *B = N->getBlock(); + std::vector<MachineInstr*> Instrs; + for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) + Instrs.push_back(&*I); + + for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { + MachineInstr *MI = *I; + unsigned Opc = MI->getOpcode(); + // Do not touch lifetime markers. This is why the target-independent DCE + // cannot be used. + if (Opc == TargetOpcode::LIFETIME_START || + Opc == TargetOpcode::LIFETIME_END) + continue; + bool Store = false; + if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store)) + continue; + + bool AllDead = true; + SmallVector<unsigned,2> Regs; + for (ConstMIOperands Op(MI); Op.isValid(); ++Op) { + if (!Op->isReg() || !Op->isDef()) + continue; + unsigned R = Op->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R) || + !MRI->use_nodbg_empty(R)) { + AllDead = false; + break; + } + Regs.push_back(R); + } + if (!AllDead) + continue; + + B->erase(MI); + for (unsigned I = 0, N = Regs.size(); I != N; ++I) + MRI->markUsesInDebugValueAsUndef(Regs[I]); + Changed = true; + } + + return Changed; +} + + +bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { + bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; + bool Changed = false; + TimerGroup __G("hexinsert"); + NamedRegionTimer __T("hexinsert", Timing && !TimingDetail); + + // Sanity check: one, but not both. + assert(!OptSelectAll0 || !OptSelectHas0); + + IFMap.clear(); + BaseOrd.clear(); + CellOrd.clear(); + + const auto &ST = MF.getSubtarget<HexagonSubtarget>(); + HII = ST.getInstrInfo(); + HRI = ST.getRegisterInfo(); + MFN = &MF; + MRI = &MF.getRegInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + + // Clean up before any further processing, so that dead code does not + // get used in a newly generated "insert" instruction. Have a custom + // version of DCE that preserves lifetime markers. Without it, merging + // of stack objects can fail to recognize and merge disjoint objects + // leading to unnecessary stack growth. + Changed = removeDeadCode(MDT->getRootNode()); + + const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); + BitTracker BTLoc(HE, MF); + BTLoc.trace(isDebug()); + BTLoc.run(); + CellMapShadow MS(BTLoc); + CMS = &MS; + + buildOrderingMF(BaseOrd); + buildOrderingBT(BaseOrd, CellOrd); + + if (isDebug()) { + dbgs() << "Cell ordering:\n"; + for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end(); + I != E; ++I) { + unsigned VR = I->first, Pos = I->second; + dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n"; + } + } + + // Collect candidates for conversion into the insert forms. + MachineBasicBlock *RootB = MDT->getRoot(); + OrderedRegisterList AvailR(CellOrd); + + { + NamedRegionTimer _T("collection", "hexinsert", TimingDetail); + collectInBlock(RootB, AvailR); + // Complete the information gathered in IFMap. + computeRemovableRegisters(); + } + + if (isDebug()) { + dbgs() << "Candidates after collection:\n"; + dump_map(); + } + + if (IFMap.empty()) + return Changed; + + { + NamedRegionTimer _T("pruning", "hexinsert", TimingDetail); + pruneCandidates(); + } + + if (isDebug()) { + dbgs() << "Candidates after pruning:\n"; + dump_map(); + } + + if (IFMap.empty()) + return Changed; + + { + NamedRegionTimer _T("selection", "hexinsert", TimingDetail); + selectCandidates(); + } + + if (isDebug()) { + dbgs() << "Candidates after selection:\n"; + dump_map(); + } + + // Filter out vregs beyond the cutoff. + if (VRegIndexCutoff.getPosition()) { + unsigned Cutoff = VRegIndexCutoff; + typedef SmallVector<IFMapType::iterator,16> IterListType; + IterListType Out; + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first); + if (Idx >= Cutoff) + Out.push_back(I); + } + for (unsigned i = 0, n = Out.size(); i < n; ++i) + IFMap.erase(Out[i]); + } + if (IFMap.empty()) + return Changed; + + { + NamedRegionTimer _T("generation", "hexinsert", TimingDetail); + generateInserts(); + } + + return true; +} + + +FunctionPass *llvm::createHexagonGenInsert() { + return new HexagonGenInsert(); +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", + "Hexagon generate \"insert\" instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert", + "Hexagon generate \"insert\" instructions", false, false) diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp new file mode 100644 index 0000000..c059d56 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -0,0 +1,319 @@ +//===--- HexagonGenMux.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// During instruction selection, MUX instructions are generated for +// conditional assignments. Since such assignments often present an +// opportunity to predicate instructions, HexagonExpandCondsets +// expands MUXes into pairs of conditional transfers, and then proceeds +// with predication of the producers/consumers of the registers involved. +// This happens after exiting from the SSA form, but before the machine +// instruction scheduler. After the scheduler and after the register +// allocation there can be cases of pairs of conditional transfers +// resulting from a MUX where neither of them was further predicated. If +// these transfers are now placed far enough from the instruction defining +// the predicate register, they cannot use the .new form. In such cases it +// is better to collapse them back to a single MUX instruction. + +#define DEBUG_TYPE "hexmux" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonGenMux(); + void initializeHexagonGenMuxPass(PassRegistry& Registry); +} + +namespace { + class HexagonGenMux : public MachineFunctionPass { + public: + static char ID; + HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) { + initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon generate mux instructions"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + + struct CondsetInfo { + unsigned PredR; + unsigned TrueX, FalseX; + CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {} + }; + struct DefUseInfo { + BitVector Defs, Uses; + DefUseInfo() : Defs(), Uses() {} + DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {} + }; + struct MuxInfo { + MachineBasicBlock::iterator At; + unsigned DefR, PredR; + MachineOperand *SrcT, *SrcF; + MachineInstr *Def1, *Def2; + MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR, + MachineOperand *TOp, MachineOperand *FOp, + MachineInstr *D1, MachineInstr *D2) + : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1), + Def2(D2) {} + }; + typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap; + typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap; + typedef SmallVector<MuxInfo,4> MuxInfoList; + + bool isRegPair(unsigned Reg) const { + return Hexagon::DoubleRegsRegClass.contains(Reg); + } + void getSubRegs(unsigned Reg, BitVector &SRs) const; + void expandReg(unsigned Reg, BitVector &Set) const; + void getDefsUses(const MachineInstr *MI, BitVector &Defs, + BitVector &Uses) const; + void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, + DefUseInfoMap &DUM); + bool isCondTransfer(unsigned Opc) const; + unsigned getMuxOpcode(const MachineOperand &Src1, + const MachineOperand &Src2) const; + bool genMuxInBlock(MachineBasicBlock &B); + }; + + char HexagonGenMux::ID = 0; +} + +INITIALIZE_PASS(HexagonGenMux, "hexagon-mux", + "Hexagon generate mux instructions", false, false) + + +void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const { + for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I) + SRs[*I] = true; +} + + +void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const { + if (isRegPair(Reg)) + getSubRegs(Reg, Set); + else + Set[Reg] = true; +} + + +void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, + BitVector &Uses) const { + // First, get the implicit defs and uses for this instruction. + unsigned Opc = MI->getOpcode(); + const MCInstrDesc &D = HII->get(Opc); + if (const MCPhysReg *R = D.ImplicitDefs) + while (*R) + expandReg(*R++, Defs); + if (const MCPhysReg *R = D.ImplicitUses) + while (*R) + expandReg(*R++, Uses); + + // Look over all operands, and collect explicit defs and uses. + for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) { + if (!Mo->isReg() || Mo->isImplicit()) + continue; + unsigned R = Mo->getReg(); + BitVector &Set = Mo->isDef() ? Defs : Uses; + expandReg(R, Set); + } +} + + +void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, + DefUseInfoMap &DUM) { + unsigned Index = 0; + unsigned NR = HRI->getNumRegs(); + BitVector Defs(NR), Uses(NR); + + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + I2X.insert(std::make_pair(MI, Index)); + Defs.reset(); + Uses.reset(); + getDefsUses(MI, Defs, Uses); + DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses))); + Index++; + } +} + + +bool HexagonGenMux::isCondTransfer(unsigned Opc) const { + switch (Opc) { + case Hexagon::A2_tfrt: + case Hexagon::A2_tfrf: + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmoveif: + return true; + } + return false; +} + + +unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1, + const MachineOperand &Src2) const { + bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg(); + if (IsReg1) + return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir; + if (IsReg2) + return Hexagon::C2_muxri; + + // Neither is a register. The first source is extendable, but the second + // is not (s8). + if (Src2.isImm() && isInt<8>(Src2.getImm())) + return Hexagon::C2_muxii; + + return 0; +} + + +bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { + bool Changed = false; + InstrIndexMap I2X; + DefUseInfoMap DUM; + buildMaps(B, I2X, DUM); + + typedef DenseMap<unsigned,CondsetInfo> CondsetMap; + CondsetMap CM; + MuxInfoList ML; + + MachineBasicBlock::iterator NextI, End = B.end(); + for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) { + MachineInstr *MI = &*I; + NextI = std::next(I); + unsigned Opc = MI->getOpcode(); + if (!isCondTransfer(Opc)) + continue; + unsigned DR = MI->getOperand(0).getReg(); + if (isRegPair(DR)) + continue; + + unsigned PR = MI->getOperand(1).getReg(); + unsigned Idx = I2X.lookup(MI); + CondsetMap::iterator F = CM.find(DR); + bool IfTrue = HII->isPredicatedTrue(Opc); + + // If there is no record of a conditional transfer for this register, + // or the predicate register differs, create a new record for it. + if (F != CM.end() && F->second.PredR != PR) { + CM.erase(F); + F = CM.end(); + } + if (F == CM.end()) { + auto It = CM.insert(std::make_pair(DR, CondsetInfo())); + F = It.first; + F->second.PredR = PR; + } + CondsetInfo &CI = F->second; + if (IfTrue) + CI.TrueX = Idx; + else + CI.FalseX = Idx; + if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX) + continue; + + // There is now a complete definition of DR, i.e. we have the predicate + // register, the definition if-true, and definition if-false. + + // First, check if both definitions are far enough from the definition + // of the predicate register. + unsigned MinX = std::min(CI.TrueX, CI.FalseX); + unsigned MaxX = std::max(CI.TrueX, CI.FalseX); + unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0; + bool NearDef = false; + for (unsigned X = SearchX; X < MaxX; ++X) { + const DefUseInfo &DU = DUM.lookup(X); + if (!DU.Defs[PR]) + continue; + NearDef = true; + break; + } + if (NearDef) + continue; + + // The predicate register is not defined in the last few instructions. + // Check if the conversion to MUX is possible (either "up", i.e. at the + // place of the earlier partial definition, or "down", where the later + // definition is located). Examine all defs and uses between these two + // definitions. + // SR1, SR2 - source registers from the first and the second definition. + MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin(); + std::advance(It1, MinX); + std::advance(It2, MaxX); + MachineInstr *Def1 = It1, *Def2 = It2; + MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2); + unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; + unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; + bool Failure = false, CanUp = true, CanDown = true; + for (unsigned X = MinX+1; X < MaxX; X++) { + const DefUseInfo &DU = DUM.lookup(X); + if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) { + Failure = true; + break; + } + if (CanDown && DU.Defs[SR1]) + CanDown = false; + if (CanUp && DU.Defs[SR2]) + CanUp = false; + } + if (Failure || (!CanUp && !CanDown)) + continue; + + MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2; + MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2; + // Prefer "down", since this will move the MUX farther away from the + // predicate definition. + MachineBasicBlock::iterator At = CanDown ? Def2 : Def1; + ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); + } + + for (unsigned I = 0, N = ML.size(); I < N; ++I) { + MuxInfo &MX = ML[I]; + MachineBasicBlock &B = *MX.At->getParent(); + DebugLoc DL = MX.At->getDebugLoc(); + unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF); + if (!MxOpc) + continue; + BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) + .addReg(MX.PredR) + .addOperand(*MX.SrcT) + .addOperand(*MX.SrcF); + B.erase(MX.Def1); + B.erase(MX.Def2); + Changed = true; + } + + return Changed; +} + +bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) { + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + bool Changed = false; + for (auto &I : MF) + Changed |= genMuxInBlock(I); + return Changed; +} + +FunctionPass *llvm::createHexagonGenMux() { + return new HexagonGenMux(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp new file mode 100644 index 0000000..d9675b5 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -0,0 +1,525 @@ +//===--- HexagonGenPredicate.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "gen-pred" + +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "HexagonTargetMachine.h" + +#include <functional> +#include <queue> +#include <set> +#include <vector> + +using namespace llvm; + +namespace llvm { + void initializeHexagonGenPredicatePass(PassRegistry& Registry); + FunctionPass *createHexagonGenPredicate(); +} + +namespace { + struct Register { + unsigned R, S; + Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {} + Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {} + bool operator== (const Register &Reg) const { + return R == Reg.R && S == Reg.S; + } + bool operator< (const Register &Reg) const { + return R < Reg.R || (R == Reg.R && S < Reg.S); + } + }; + struct PrintRegister { + PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {} + friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR); + private: + Register Reg; + const TargetRegisterInfo &TRI; + }; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) + LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) { + return OS << PrintReg(PR.Reg.R, &PR.TRI, PR.Reg.S); + } + + class HexagonGenPredicate : public MachineFunctionPass { + public: + static char ID; + HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) { + initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Hexagon generate predicate operations"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef SetVector<MachineInstr*> VectOfInst; + typedef std::set<Register> SetOfReg; + typedef std::map<Register,Register> RegToRegMap; + + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; + MachineRegisterInfo *MRI; + SetOfReg PredGPRs; + VectOfInst PUsers; + RegToRegMap G2P; + + bool isPredReg(unsigned R); + void collectPredicateGPR(MachineFunction &MF); + void processPredicateGPR(const Register &Reg); + unsigned getPredForm(unsigned Opc); + bool isConvertibleToPredForm(const MachineInstr *MI); + bool isScalarCmp(unsigned Opc); + bool isScalarPred(Register PredReg); + Register getPredRegFor(const Register &Reg); + bool convertToPredForm(MachineInstr *MI); + bool eliminatePredCopies(MachineFunction &MF); + }; + + char HexagonGenPredicate::ID = 0; +} + +INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred", + "Hexagon generate predicate operations", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred", + "Hexagon generate predicate operations", false, false) + +bool HexagonGenPredicate::isPredReg(unsigned R) { + if (!TargetRegisterInfo::isVirtualRegister(R)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(R); + return RC == &Hexagon::PredRegsRegClass; +} + + +unsigned HexagonGenPredicate::getPredForm(unsigned Opc) { + using namespace Hexagon; + + switch (Opc) { + case A2_and: + case A2_andp: + return C2_and; + case A4_andn: + case A4_andnp: + return C2_andn; + case M4_and_and: + return C4_and_and; + case M4_and_andn: + return C4_and_andn; + case M4_and_or: + return C4_and_or; + + case A2_or: + case A2_orp: + return C2_or; + case A4_orn: + case A4_ornp: + return C2_orn; + case M4_or_and: + return C4_or_and; + case M4_or_andn: + return C4_or_andn; + case M4_or_or: + return C4_or_or; + + case A2_xor: + case A2_xorp: + return C2_xor; + + case C2_tfrrp: + return COPY; + } + // The opcode corresponding to 0 is TargetOpcode::PHI. We can use 0 here + // to denote "none", but we need to make sure that none of the valid opcodes + // that we return will ever be 0. + assert(PHI == 0 && "Use different value for <none>"); + return 0; +} + + +bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (getPredForm(Opc) != 0) + return true; + + // Comparisons against 0 are also convertible. This does not apply to + // A4_rcmpeqi or A4_rcmpneqi, since they produce values 0 or 1, which + // may not match the value that the predicate register would have if + // it was converted to a predicate form. + switch (Opc) { + case Hexagon::C2_cmpeqi: + case Hexagon::C4_cmpneqi: + if (MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) + return true; + break; + } + return false; +} + + +void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { + for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) { + MachineBasicBlock &B = *A; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::C2_tfrpr: + case TargetOpcode::COPY: + if (isPredReg(MI->getOperand(1).getReg())) { + Register RD = MI->getOperand(0); + if (TargetRegisterInfo::isVirtualRegister(RD.R)) + PredGPRs.insert(RD); + } + break; + } + } + } +} + + +void HexagonGenPredicate::processPredicateGPR(const Register &Reg) { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " + << PrintReg(Reg.R, TRI, Reg.S) << "\n"); + typedef MachineRegisterInfo::use_iterator use_iterator; + use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end(); + if (I == E) { + DEBUG(dbgs() << "Dead reg: " << PrintReg(Reg.R, TRI, Reg.S) << '\n'); + MachineInstr *DefI = MRI->getVRegDef(Reg.R); + DefI->eraseFromParent(); + return; + } + + for (; I != E; ++I) { + MachineInstr *UseI = I->getParent(); + if (isConvertibleToPredForm(UseI)) + PUsers.insert(UseI); + } +} + + +Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { + // Create a predicate register for a given Reg. The newly created register + // will have its value copied from Reg, so that it can be later used as + // an operand in other instructions. + assert(TargetRegisterInfo::isVirtualRegister(Reg.R)); + RegToRegMap::iterator F = G2P.find(Reg); + if (F != G2P.end()) + return F->second; + + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI)); + MachineInstr *DefI = MRI->getVRegDef(Reg.R); + assert(DefI); + unsigned Opc = DefI->getOpcode(); + if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) { + assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse()); + Register PR = DefI->getOperand(1); + G2P.insert(std::make_pair(Reg, PR)); + DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n'); + return PR; + } + + MachineBasicBlock &B = *DefI->getParent(); + DebugLoc DL = DefI->getDebugLoc(); + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + unsigned NewPR = MRI->createVirtualRegister(PredRC); + + // For convertible instructions, do not modify them, so that they can + // be converted later. Generate a copy from Reg to NewPR. + if (isConvertibleToPredForm(DefI)) { + MachineBasicBlock::iterator DefIt = DefI; + BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR) + .addReg(Reg.R, 0, Reg.S); + G2P.insert(std::make_pair(Reg, Register(NewPR))); + DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n'); + return Register(NewPR); + } + + llvm_unreachable("Invalid argument"); +} + + +bool HexagonGenPredicate::isScalarCmp(unsigned Opc) { + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtup: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgei: + case Hexagon::C2_cmpgeui: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmpltei: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: + return true; + } + return false; +} + + +bool HexagonGenPredicate::isScalarPred(Register PredReg) { + std::queue<Register> WorkQ; + WorkQ.push(PredReg); + + while (!WorkQ.empty()) { + Register PR = WorkQ.front(); + WorkQ.pop(); + const MachineInstr *DefI = MRI->getVRegDef(PR.R); + if (!DefI) + return false; + unsigned DefOpc = DefI->getOpcode(); + switch (DefOpc) { + case TargetOpcode::COPY: { + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + if (MRI->getRegClass(PR.R) != PredRC) + return false; + // If it is a copy between two predicate registers, fall through. + } + case Hexagon::C2_and: + case Hexagon::C2_andn: + case Hexagon::C4_and_and: + case Hexagon::C4_and_andn: + case Hexagon::C4_and_or: + case Hexagon::C2_or: + case Hexagon::C2_orn: + case Hexagon::C4_or_and: + case Hexagon::C4_or_andn: + case Hexagon::C4_or_or: + case Hexagon::C4_or_orn: + case Hexagon::C2_xor: + // Add operands to the queue. + for (ConstMIOperands Mo(DefI); Mo.isValid(); ++Mo) + if (Mo->isReg() && Mo->isUse()) + WorkQ.push(Register(Mo->getReg())); + break; + + // All non-vector compares are ok, everything else is bad. + default: + return isScalarCmp(DefOpc); + } + } + + return true; +} + + +bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI); + + unsigned Opc = MI->getOpcode(); + assert(isConvertibleToPredForm(MI)); + unsigned NumOps = MI->getNumOperands(); + for (unsigned i = 0; i < NumOps; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + Register Reg(MO); + if (Reg.S && Reg.S != Hexagon::subreg_loreg) + return false; + if (!PredGPRs.count(Reg)) + return false; + } + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + unsigned NewOpc = getPredForm(Opc); + // Special case for comparisons against 0. + if (NewOpc == 0) { + switch (Opc) { + case Hexagon::C2_cmpeqi: + NewOpc = Hexagon::C2_not; + break; + case Hexagon::C4_cmpneqi: + NewOpc = TargetOpcode::COPY; + break; + default: + return false; + } + + // If it's a scalar predicate register, then all bits in it are + // the same. Otherwise, to determine whether all bits are 0 or not + // we would need to use any8. + Register PR = getPredRegFor(MI->getOperand(1)); + if (!isScalarPred(PR)) + return false; + // This will skip the immediate argument when creating the predicate + // version instruction. + NumOps = 2; + } + + // Some sanity: check that def is in operand #0. + MachineOperand &Op0 = MI->getOperand(0); + assert(Op0.isDef()); + Register OutR(Op0); + + // Don't use getPredRegFor, since it will create an association between + // the argument and a created predicate register (i.e. it will insert a + // copy if a new predicate register is created). + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + Register NewPR = MRI->createVirtualRegister(PredRC); + MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R); + + // Add predicate counterparts of the GPRs. + for (unsigned i = 1; i < NumOps; ++i) { + Register GPR = MI->getOperand(i); + Register Pred = getPredRegFor(GPR); + MIB.addReg(Pred.R, 0, Pred.S); + } + DEBUG(dbgs() << "generated: " << *MIB); + + // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR + // with NewGPR. + const TargetRegisterClass *RC = MRI->getRegClass(OutR.R); + unsigned NewOutR = MRI->createVirtualRegister(RC); + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR) + .addReg(NewPR.R, 0, NewPR.S); + MRI->replaceRegWith(OutR.R, NewOutR); + MI->eraseFromParent(); + + // If the processed instruction was C2_tfrrp (i.e. Rn = Pm; Pk = Rn), + // then the output will be a predicate register. Do not visit the + // users of it. + if (!isPredReg(NewOutR)) { + Register R(NewOutR); + PredGPRs.insert(R); + processPredicateGPR(R); + } + return true; +} + + +bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n"); + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + bool Changed = false; + VectOfInst Erase; + + // First, replace copies + // IntR = PredR1 + // PredR2 = IntR + // with + // PredR2 = PredR1 + // Such sequences can be generated when a copy-into-pred is generated from + // a gpr register holding a result of a convertible instruction. After + // the convertible instruction is converted, its predicate result will be + // copied back into the original gpr. + + for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) { + MachineBasicBlock &B = *A; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + if (I->getOpcode() != TargetOpcode::COPY) + continue; + Register DR = I->getOperand(0); + Register SR = I->getOperand(1); + if (!TargetRegisterInfo::isVirtualRegister(DR.R)) + continue; + if (!TargetRegisterInfo::isVirtualRegister(SR.R)) + continue; + if (MRI->getRegClass(DR.R) != PredRC) + continue; + if (MRI->getRegClass(SR.R) != PredRC) + continue; + assert(!DR.S && !SR.S && "Unexpected subregister"); + MRI->replaceRegWith(DR.R, SR.R); + Erase.insert(I); + Changed = true; + } + } + + for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return Changed; +} + + +bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + TRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + MRI = &MF.getRegInfo(); + PredGPRs.clear(); + PUsers.clear(); + G2P.clear(); + + bool Changed = false; + collectPredicateGPR(MF); + for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I) + processPredicateGPR(*I); + + bool Again; + do { + Again = false; + VectOfInst Processed, Copy; + + typedef VectOfInst::iterator iterator; + Copy = PUsers; + for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) { + MachineInstr *MI = *I; + bool Done = convertToPredForm(MI); + if (Done) { + Processed.insert(MI); + Again = true; + } + } + Changed |= Again; + + auto Done = [Processed] (MachineInstr *MI) -> bool { + return Processed.count(MI); + }; + PUsers.remove_if(Done); + } while (Again); + + Changed |= eliminatePredCopies(MF); + return Changed; +} + + +FunctionPass *llvm::createHexagonGenPredicate() { + return new HexagonGenPredicate(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp new file mode 100644 index 0000000..d20a809 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -0,0 +1,1965 @@ +//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass identifies loops where we can generate the Hexagon hardware +// loop instruction. The hardware loop can perform loop branches with a +// zero-cycle overhead. +// +// The pattern that defines the induction variable can changed depending on +// prior optimizations. For example, the IndVarSimplify phase run by 'opt' +// normalizes induction variables, and the Loop Strength Reduction pass +// run by 'llc' may also make changes to the induction variable. +// The pattern detected by this phase is due to running Strength Reduction. +// +// Criteria for hardware loops: +// - Countable loops (w/ ind. var for a trip count) +// - Assumes loops are normalized by IndVarSimplify +// - Try inner-most loops first +// - No function calls in loops. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "Hexagon.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "hwloops" + +#ifndef NDEBUG +static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1)); + +// Option to create preheader only for a specific function. +static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden, + cl::init("")); +#endif + +// Option to create a preheader if one doesn't exist. +static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", + cl::Hidden, cl::init(true), + cl::desc("Add a preheader to a hardware loop if one doesn't exist")); + +STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); + +namespace llvm { + FunctionPass *createHexagonHardwareLoops(); + void initializeHexagonHardwareLoopsPass(PassRegistry&); +} + +namespace { + class CountValue; + struct HexagonHardwareLoops : public MachineFunctionPass { + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + const HexagonInstrInfo *TII; +#ifndef NDEBUG + static int Counter; +#endif + + public: + static char ID; + + HexagonHardwareLoops() : MachineFunctionPass(ID) { + initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { return "Hexagon Hardware Loops"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + typedef std::map<unsigned, MachineInstr *> LoopFeederMap; + + /// Kinds of comparisons in the compare instructions. + struct Comparison { + enum Kind { + EQ = 0x01, + NE = 0x02, + L = 0x04, + G = 0x08, + U = 0x40, + LTs = L, + LEs = L | EQ, + GTs = G, + GEs = G | EQ, + LTu = L | U, + LEu = L | EQ | U, + GTu = G | U, + GEu = G | EQ | U + }; + + static Kind getSwappedComparison(Kind Cmp) { + assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); + if ((Cmp & L) || (Cmp & G)) + return (Kind)(Cmp ^ (L|G)); + return Cmp; + } + + static Kind getNegatedComparison(Kind Cmp) { + if ((Cmp & L) || (Cmp & G)) + return (Kind)((Cmp ^ (L | G)) ^ EQ); + if ((Cmp & NE) || (Cmp & EQ)) + return (Kind)(Cmp ^ (EQ | NE)); + return (Kind)0; + } + + static bool isSigned(Kind Cmp) { + return (Cmp & (L | G) && !(Cmp & U)); + } + + static bool isUnsigned(Kind Cmp) { + return (Cmp & U); + } + + }; + + /// \brief Find the register that contains the loop controlling + /// induction variable. + /// If successful, it will return true and set the \p Reg, \p IVBump + /// and \p IVOp arguments. Otherwise it will return false. + /// The returned induction register is the register R that follows the + /// following induction pattern: + /// loop: + /// R = phi ..., [ R.next, LatchBlock ] + /// R.next = R + #bump + /// if (R.next < #N) goto loop + /// IVBump is the immediate value added to R, and IVOp is the instruction + /// "R.next = R + #bump". + bool findInductionRegister(MachineLoop *L, unsigned &Reg, + int64_t &IVBump, MachineInstr *&IVOp) const; + + /// \brief Return the comparison kind for the specified opcode. + Comparison::Kind getComparisonKind(unsigned CondOpc, + MachineOperand *InitialValue, + const MachineOperand *Endvalue, + int64_t IVBump) const; + + /// \brief Analyze the statements in a loop to determine if the loop + /// has a computable trip count and, if so, return a value that represents + /// the trip count expression. + CountValue *getLoopTripCount(MachineLoop *L, + SmallVectorImpl<MachineInstr *> &OldInsts); + + /// \brief Return the expression that represents the number of times + /// a loop iterates. The function takes the operands that represent the + /// loop start value, loop end value, and induction value. Based upon + /// these operands, the function attempts to compute the trip count. + /// If the trip count is not directly available (as an immediate value, + /// or a register), the function will attempt to insert computation of it + /// to the loop's preheader. + CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start, + const MachineOperand *End, unsigned IVReg, + int64_t IVBump, Comparison::Kind Cmp) const; + + /// \brief Return true if the instruction is not valid within a hardware + /// loop. + bool isInvalidLoopOperation(const MachineInstr *MI, + bool IsInnerHWLoop) const; + + /// \brief Return true if the loop contains an instruction that inhibits + /// using the hardware loop. + bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; + + /// \brief Given a loop, check if we can convert it to a hardware loop. + /// If so, then perform the conversion and return true. + bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); + + /// \brief Return true if the instruction is now dead. + bool isDead(const MachineInstr *MI, + SmallVectorImpl<MachineInstr *> &DeadPhis) const; + + /// \brief Remove the instruction if it is now dead. + void removeIfDead(MachineInstr *MI); + + /// \brief Make sure that the "bump" instruction executes before the + /// compare. We need that for the IV fixup, so that the compare + /// instruction would not use a bumped value that has not yet been + /// defined. If the instructions are out of order, try to reorder them. + bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); + + /// \brief Return true if MO and MI pair is visited only once. If visited + /// more than once, this indicates there is recursion. In such a case, + /// return false. + bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, + const MachineOperand *MO, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Return true if the Phi may generate a value that may underflow, + /// or may wrap. + bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Return true if the induction variable may underflow an unsigned + /// value in the first iteration. + bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, + const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Check if the given operand has a compile-time known constant + /// value. Return true if yes, and false otherwise. When returning true, set + /// Val to the corresponding constant value. + bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; + + /// \brief Check if the operand has a compile-time known constant value. + bool isImmediate(const MachineOperand &MO) const { + int64_t V; + return checkForImmediate(MO, V); + } + + /// \brief Return the immediate for the specified operand. + int64_t getImmediate(const MachineOperand &MO) const { + int64_t V; + if (!checkForImmediate(MO, V)) + llvm_unreachable("Invalid operand"); + return V; + } + + /// \brief Reset the given machine operand to now refer to a new immediate + /// value. Assumes that the operand was already referencing an immediate + /// value, either directly, or via a register. + void setImmediate(MachineOperand &MO, int64_t Val); + + /// \brief Fix the data flow of the induction varible. + /// The desired flow is: phi ---> bump -+-> comparison-in-latch. + /// | + /// +-> back to phi + /// where "bump" is the increment of the induction variable: + /// iv = iv + #const. + /// Due to some prior code transformations, the actual flow may look + /// like this: + /// phi -+-> bump ---> back to phi + /// | + /// +-> comparison-in-latch (against upper_bound-bump), + /// i.e. the comparison that controls the loop execution may be using + /// the value of the induction variable from before the increment. + /// + /// Return true if the loop's flow is the desired one (i.e. it's + /// either been fixed, or no fixing was necessary). + /// Otherwise, return false. This can happen if the induction variable + /// couldn't be identified, or if the value in the latch's comparison + /// cannot be adjusted to reflect the post-bump value. + bool fixupInductionVariable(MachineLoop *L); + + /// \brief Given a loop, if it does not have a preheader, create one. + /// Return the block that is the preheader. + MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); + }; + + char HexagonHardwareLoops::ID = 0; +#ifndef NDEBUG + int HexagonHardwareLoops::Counter = 0; +#endif + + /// \brief Abstraction for a trip count of a loop. A smaller version + /// of the MachineOperand class without the concerns of changing the + /// operand representation. + class CountValue { + public: + enum CountValueType { + CV_Register, + CV_Immediate + }; + private: + CountValueType Kind; + union Values { + struct { + unsigned Reg; + unsigned Sub; + } R; + unsigned ImmVal; + } Contents; + + public: + explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { + Kind = t; + if (Kind == CV_Register) { + Contents.R.Reg = v; + Contents.R.Sub = u; + } else { + Contents.ImmVal = v; + } + } + bool isReg() const { return Kind == CV_Register; } + bool isImm() const { return Kind == CV_Immediate; } + + unsigned getReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.R.Reg; + } + unsigned getSubReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.R.Sub; + } + unsigned getImm() const { + assert(isImm() && "Wrong CountValue accessor"); + return Contents.ImmVal; + } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { + if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } + if (isImm()) { OS << Contents.ImmVal; } + } + }; +} // end anonymous namespace + + +INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) + +FunctionPass *llvm::createHexagonHardwareLoops() { + return new HexagonHardwareLoops(); +} + +bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); + + bool Changed = false; + + MLI = &getAnalysis<MachineLoopInfo>(); + MRI = &MF.getRegInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + + for (auto &L : *MLI) + if (!L->getParentLoop()) { + bool L0Used = false; + bool L1Used = false; + Changed |= convertToHardwareLoop(L, L0Used, L1Used); + } + + return Changed; +} + +/// \brief Return the latch block if it's one of the exiting blocks. Otherwise, +/// return the exiting block. Return 'null' when multiple exiting blocks are +/// present. +static MachineBasicBlock* getExitingBlock(MachineLoop *L) { + if (MachineBasicBlock *Latch = L->getLoopLatch()) { + if (L->isLoopExiting(Latch)) + return Latch; + else + return L->getExitingBlock(); + } + return nullptr; +} + +bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, + unsigned &Reg, + int64_t &IVBump, + MachineInstr *&IVOp + ) const { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + if (!Header || !Preheader || !Latch || !ExitingBlock) + return false; + + // This pair represents an induction register together with an immediate + // value that will be added to it in each loop iteration. + typedef std::pair<unsigned,int64_t> RegisterBump; + + // Mapping: R.next -> (R, bump), where R, R.next and bump are derived + // from an induction operation + // R.next = R + bump + // where bump is an immediate value. + typedef std::map<unsigned,RegisterBump> InductionMap; + + InductionMap IndMap; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. Get the operand that corresponds to the + // latch block, and see if is a result of an addition of form "reg+imm", + // where the "reg" is defined by the PHI node we are looking at. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiOpReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiOpReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp); + + if (isAdd) { + // If the register operand to the add is the PHI we're looking at, this + // meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + MachineOperand &Opnd2 = DI->getOperand(2); + int64_t V; + if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { + unsigned UpdReg = DI->getOperand(0).getReg(); + IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); + } + } + } // for (i) + } // for (instr) + + SmallVector<MachineOperand,2> Cond; + MachineBasicBlock *TB = nullptr, *FB = nullptr; + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + if (NotAnalyzed) + return false; + + unsigned PredR, PredPos, PredRegFlags; + if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) + return false; + + MachineInstr *PredI = MRI->getVRegDef(PredR); + if (!PredI->isCompare()) + return false; + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int CmpImm = 0, CmpMask = 0; + bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2, + CmpMask, CmpImm); + // Fail if the compare was not analyzed, or it's not comparing a register + // with an immediate value. Not checking the mask here, since we handle + // the individual compare opcodes (including A4_cmpb*) later on. + if (!CmpAnalyzed) + return false; + + // Exactly one of the input registers to the comparison should be among + // the induction registers. + InductionMap::iterator IndMapEnd = IndMap.end(); + InductionMap::iterator F = IndMapEnd; + if (CmpReg1 != 0) { + InductionMap::iterator F1 = IndMap.find(CmpReg1); + if (F1 != IndMapEnd) + F = F1; + } + if (CmpReg2 != 0) { + InductionMap::iterator F2 = IndMap.find(CmpReg2); + if (F2 != IndMapEnd) { + if (F != IndMapEnd) + return false; + F = F2; + } + } + if (F == IndMapEnd) + return false; + + Reg = F->second.first; + IVBump = F->second.second; + IVOp = MRI->getVRegDef(F->first); + return true; +} + +// Return the comparison kind for the specified opcode. +HexagonHardwareLoops::Comparison::Kind +HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, + MachineOperand *InitialValue, + const MachineOperand *EndValue, + int64_t IVBump) const { + Comparison::Kind Cmp = (Comparison::Kind)0; + switch (CondOpc) { + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + Cmp = Comparison::EQ; + break; + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmpneqi: + Cmp = Comparison::NE; + break; + case Hexagon::C4_cmplte: + Cmp = Comparison::LEs; + break; + case Hexagon::C4_cmplteu: + Cmp = Comparison::LEu; + break; + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + Cmp = Comparison::GTu; + break; + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + Cmp = Comparison::GTs; + break; + default: + return (Comparison::Kind)0; + } + return Cmp; +} + +/// \brief Analyze the statements in a loop to determine if the loop has +/// a computable trip count and, if so, return a value that represents +/// the trip count expression. +/// +/// This function iterates over the phi nodes in the loop to check for +/// induction variable patterns that are used in the calculation for +/// the number of time the loop is executed. +CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, + SmallVectorImpl<MachineInstr *> &OldInsts) { + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); + assert(PI != TopMBB->pred_end() && + "Loop must have more than one incoming edge!"); + MachineBasicBlock *Backedge = *PI++; + if (PI == TopMBB->pred_end()) // dead loop? + return nullptr; + MachineBasicBlock *Incoming = *PI++; + if (PI != TopMBB->pred_end()) // multiple backedges? + return nullptr; + + // Make sure there is one incoming and one backedge and determine which + // is which. + if (L->contains(Incoming)) { + if (L->contains(Backedge)) + return nullptr; + std::swap(Incoming, Backedge); + } else if (!L->contains(Backedge)) + return nullptr; + + // Look for the cmp instruction to determine if we can get a useful trip + // count. The trip count can be either a register or an immediate. The + // location of the value depends upon the type (reg or imm). + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + if (!ExitingBlock) + return nullptr; + + unsigned IVReg = 0; + int64_t IVBump = 0; + MachineInstr *IVOp; + bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); + if (!FoundIV) + return nullptr; + + MachineBasicBlock *Preheader = L->getLoopPreheader(); + + MachineOperand *InitialValue = nullptr; + MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); + MachineBasicBlock *Latch = L->getLoopLatch(); + for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { + MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); + if (MBB == Preheader) + InitialValue = &IV_Phi->getOperand(i); + else if (MBB == Latch) + IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. + } + if (!InitialValue) + return nullptr; + + SmallVector<MachineOperand,2> Cond; + MachineBasicBlock *TB = nullptr, *FB = nullptr; + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + if (NotAnalyzed) + return nullptr; + + MachineBasicBlock *Header = L->getHeader(); + // TB must be non-null. If FB is also non-null, one of them must be + // the header. Otherwise, branch to TB could be exiting the loop, and + // the fall through can go to the header. + assert (TB && "Exit block without a branch?"); + if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { + MachineBasicBlock *LTB = 0, *LFB = 0; + SmallVector<MachineOperand,2> LCond; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false); + if (NotAnalyzed) + return nullptr; + if (TB == Latch) + TB = (LTB == Header) ? LTB : LFB; + else + FB = (LTB == Header) ? LTB: LFB; + } + assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); + if (!TB || (FB && TB != Header && FB != Header)) + return nullptr; + + // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch + // to put imm(0), followed by P in the vector Cond. + // If TB is not the header, it means that the "not-taken" path must lead + // to the header. + bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); + unsigned PredReg, PredPos, PredRegFlags; + if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) + return nullptr; + MachineInstr *CondI = MRI->getVRegDef(PredReg); + unsigned CondOpc = CondI->getOpcode(); + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int Mask = 0, ImmValue = 0; + bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, + Mask, ImmValue); + if (!AnalyzedCmp) + return nullptr; + + // The comparison operator type determines how we compute the loop + // trip count. + OldInsts.push_back(CondI); + OldInsts.push_back(IVOp); + + // Sadly, the following code gets information based on the position + // of the operands in the compare instruction. This has to be done + // this way, because the comparisons check for a specific relationship + // between the operands (e.g. is-less-than), rather than to find out + // what relationship the operands are in (as on PPC). + Comparison::Kind Cmp; + bool isSwapped = false; + const MachineOperand &Op1 = CondI->getOperand(1); + const MachineOperand &Op2 = CondI->getOperand(2); + const MachineOperand *EndValue = nullptr; + + if (Op1.isReg()) { + if (Op2.isImm() || Op1.getReg() == IVReg) + EndValue = &Op2; + else { + EndValue = &Op1; + isSwapped = true; + } + } + + if (!EndValue) + return nullptr; + + Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump); + if (!Cmp) + return nullptr; + if (Negated) + Cmp = Comparison::getNegatedComparison(Cmp); + if (isSwapped) + Cmp = Comparison::getSwappedComparison(Cmp); + + if (InitialValue->isReg()) { + unsigned R = InitialValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return nullptr; + OldInsts.push_back(MRI->getVRegDef(R)); + } + if (EndValue->isReg()) { + unsigned R = EndValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return nullptr; + OldInsts.push_back(MRI->getVRegDef(R)); + } + + return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); +} + +/// \brief Helper function that returns the expression that represents the +/// number of times a loop iterates. The function takes the operands that +/// represent the loop start value, loop end value, and induction value. +/// Based upon these operands, the function attempts to compute the trip count. +CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, + const MachineOperand *Start, + const MachineOperand *End, + unsigned IVReg, + int64_t IVBump, + Comparison::Kind Cmp) const { + // Cannot handle comparison EQ, i.e. while (A == B). + if (Cmp == Comparison::EQ) + return nullptr; + + // Check if either the start or end values are an assignment of an immediate. + // If so, use the immediate value rather than the register. + if (Start->isReg()) { + const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); + if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi || + StartValInstr->getOpcode() == Hexagon::A2_tfrpi)) + Start = &StartValInstr->getOperand(1); + } + if (End->isReg()) { + const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); + if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi || + EndValInstr->getOpcode() == Hexagon::A2_tfrpi)) + End = &EndValInstr->getOperand(1); + } + + if (!Start->isReg() && !Start->isImm()) + return nullptr; + if (!End->isReg() && !End->isImm()) + return nullptr; + + bool CmpLess = Cmp & Comparison::L; + bool CmpGreater = Cmp & Comparison::G; + bool CmpHasEqual = Cmp & Comparison::EQ; + + // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. + if (CmpLess && IVBump < 0) + // Loop going while iv is "less" with the iv value going down. Must wrap. + return nullptr; + + if (CmpGreater && IVBump > 0) + // Loop going while iv is "greater" with the iv value going up. Must wrap. + return nullptr; + + // Phis that may feed into the loop. + LoopFeederMap LoopFeederPhi; + + // Check if the initial value may be zero and can be decremented in the first + // iteration. If the value is zero, the endloop instruction will not decrement + // the loop counter, so we shouldn't generate a hardware loop in this case. + if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, + LoopFeederPhi)) + return nullptr; + + if (Start->isImm() && End->isImm()) { + // Both, start and end are immediates. + int64_t StartV = Start->getImm(); + int64_t EndV = End->getImm(); + int64_t Dist = EndV - StartV; + if (Dist == 0) + return nullptr; + + bool Exact = (Dist % IVBump) == 0; + + if (Cmp == Comparison::NE) { + if (!Exact) + return nullptr; + if ((Dist < 0) ^ (IVBump < 0)) + return nullptr; + } + + // For comparisons that include the final value (i.e. include equality + // with the final value), we need to increase the distance by 1. + if (CmpHasEqual) + Dist = Dist > 0 ? Dist+1 : Dist-1; + + // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, + // CmpGreater should imply Dist < 0. These conditions could actually + // fail, for example, in unreachable code (which may still appear to be + // reachable in the CFG). + if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0)) + return nullptr; + + // "Normalized" distance, i.e. with the bump set to +-1. + int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump + : (-Dist + (-IVBump - 1)) / (-IVBump); + assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); + + uint64_t Count = Dist1; + + if (Count > 0xFFFFFFFFULL) + return nullptr; + + return new CountValue(CountValue::CV_Immediate, Count); + } + + // A general case: Start and End are some values, but the actual + // iteration count may not be available. If it is not, insert + // a computation of it into the preheader. + + // If the induction variable bump is not a power of 2, quit. + // Othwerise we'd need a general integer division. + if (!isPowerOf2_64(std::abs(IVBump))) + return nullptr; + + MachineBasicBlock *PH = Loop->getLoopPreheader(); + assert (PH && "Should have a preheader by now"); + MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); + DebugLoc DL; + if (InsertPos != PH->end()) + DL = InsertPos->getDebugLoc(); + + // If Start is an immediate and End is a register, the trip count + // will be "reg - imm". Hexagon's "subtract immediate" instruction + // is actually "reg + -imm". + + // If the loop IV is going downwards, i.e. if the bump is negative, + // then the iteration count (computed as End-Start) will need to be + // negated. To avoid the negation, just swap Start and End. + if (IVBump < 0) { + std::swap(Start, End); + IVBump = -IVBump; + } + // Cmp may now have a wrong direction, e.g. LEs may now be GEs. + // Signedness, and "including equality" are preserved. + + bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) + bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) + + int64_t StartV = 0, EndV = 0; + if (Start->isImm()) + StartV = Start->getImm(); + if (End->isImm()) + EndV = End->getImm(); + + int64_t AdjV = 0; + // To compute the iteration count, we would need this computation: + // Count = (End - Start + (IVBump-1)) / IVBump + // or, when CmpHasEqual: + // Count = (End - Start + (IVBump-1)+1) / IVBump + // The "IVBump-1" part is the adjustment (AdjV). We can avoid + // generating an instruction specifically to add it if we can adjust + // the immediate values for Start or End. + + if (CmpHasEqual) { + // Need to add 1 to the total iteration count. + if (Start->isImm()) + StartV--; + else if (End->isImm()) + EndV++; + else + AdjV += 1; + } + + if (Cmp != Comparison::NE) { + if (Start->isImm()) + StartV -= (IVBump-1); + else if (End->isImm()) + EndV += (IVBump-1); + else + AdjV += (IVBump-1); + } + + unsigned R = 0, SR = 0; + if (Start->isReg()) { + R = Start->getReg(); + SR = Start->getSubReg(); + } else { + R = End->getReg(); + SR = End->getSubReg(); + } + const TargetRegisterClass *RC = MRI->getRegClass(R); + // Hardware loops cannot handle 64-bit registers. If it's a double + // register, it has to have a subregister. + if (!SR && RC == &Hexagon::DoubleRegsRegClass) + return nullptr; + const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; + + // Compute DistR (register with the distance between Start and End). + unsigned DistR, DistSR; + + // Avoid special case, where the start value is an imm(0). + if (Start->isImm() && StartV == 0) { + DistR = End->getReg(); + DistSR = End->getSubReg(); + } else { + const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : + (RegToImm ? TII->get(Hexagon::A2_subri) : + TII->get(Hexagon::A2_addi)); + if (RegToReg || RegToImm) { + unsigned SubR = MRI->createVirtualRegister(IntRC); + MachineInstrBuilder SubIB = + BuildMI(*PH, InsertPos, DL, SubD, SubR); + + if (RegToReg) + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addReg(Start->getReg(), 0, Start->getSubReg()); + else + SubIB.addImm(EndV) + .addReg(Start->getReg(), 0, Start->getSubReg()); + DistR = SubR; + } else { + // If the loop has been unrolled, we should use the original loop count + // instead of recalculating the value. This will avoid additional + // 'Add' instruction. + const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); + if (EndValInstr->getOpcode() == Hexagon::A2_addi && + EndValInstr->getOperand(2).getImm() == StartV) { + DistR = EndValInstr->getOperand(1).getReg(); + } else { + unsigned SubR = MRI->createVirtualRegister(IntRC); + MachineInstrBuilder SubIB = + BuildMI(*PH, InsertPos, DL, SubD, SubR); + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addImm(-StartV); + DistR = SubR; + } + } + DistSR = 0; + } + + // From DistR, compute AdjR (register with the adjusted distance). + unsigned AdjR, AdjSR; + + if (AdjV == 0) { + AdjR = DistR; + AdjSR = DistSR; + } else { + // Generate CountR = ADD DistR, AdjVal + unsigned AddR = MRI->createVirtualRegister(IntRC); + MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); + BuildMI(*PH, InsertPos, DL, AddD, AddR) + .addReg(DistR, 0, DistSR) + .addImm(AdjV); + + AdjR = AddR; + AdjSR = 0; + } + + // From AdjR, compute CountR (register with the final count). + unsigned CountR, CountSR; + + if (IVBump == 1) { + CountR = AdjR; + CountSR = AdjSR; + } else { + // The IV bump is a power of two. Log_2(IV bump) is the shift amount. + unsigned Shift = Log2_32(IVBump); + + // Generate NormR = LSR DistR, Shift. + unsigned LsrR = MRI->createVirtualRegister(IntRC); + const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); + BuildMI(*PH, InsertPos, DL, LsrD, LsrR) + .addReg(AdjR, 0, AdjSR) + .addImm(Shift); + + CountR = LsrR; + CountSR = 0; + } + + return new CountValue(CountValue::CV_Register, CountR, CountSR); +} + +/// \brief Return true if the operation is invalid within hardware loop. +bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, + bool IsInnerHWLoop) const { + + // Call is not allowed because the callee may use a hardware loop except for + // the case when the call never returns. + if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr) + return true; + + // Check if the instruction defines a hardware loop register. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 || + R == Hexagon::LC1 || R == Hexagon::SA1)) + return true; + if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1)) + return true; + } + return false; +} + +/// \brief Return true if the loop contains an instruction that inhibits +/// the use of the hardware loop instruction. +bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, + bool IsInnerHWLoop) const { + const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); + DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { + const MachineInstr *MI = &*MII; + if (isInvalidLoopOperation(MI, IsInnerHWLoop)) { + DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump();); + return true; + } + } + } + return false; +} + +/// \brief Returns true if the instruction is dead. This was essentially +/// copied from DeadMachineInstructionElim::isDead, but with special cases +/// for inline asm, physical registers and instructions with side effects +/// removed. +bool HexagonHardwareLoops::isDead(const MachineInstr *MI, + SmallVectorImpl<MachineInstr *> &DeadPhis) const { + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned Reg = MO.getReg(); + if (MRI->use_nodbg_empty(Reg)) + continue; + + typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator; + + // This instruction has users, but if the only user is the phi node for the + // parent block, and the only use of that phi node is this instruction, then + // this instruction is dead: both it (and the phi node) can be removed. + use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); + use_nodbg_iterator End = MRI->use_nodbg_end(); + if (std::next(I) != End || !I->getParent()->isPHI()) + return false; + + MachineInstr *OnePhi = I->getParent(); + for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { + const MachineOperand &OPO = OnePhi->getOperand(j); + if (!OPO.isReg() || !OPO.isDef()) + continue; + + unsigned OPReg = OPO.getReg(); + use_nodbg_iterator nextJ; + for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); + J != End; J = nextJ) { + nextJ = std::next(J); + MachineOperand &Use = *J; + MachineInstr *UseMI = Use.getParent(); + + // If the phi node has a user that is not MI, bail. + if (MI != UseMI) + return false; + } + } + DeadPhis.push_back(OnePhi); + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { + // This procedure was essentially copied from DeadMachineInstructionElim. + + SmallVector<MachineInstr*, 1> DeadPhis; + if (isDead(MI, DeadPhis)) { + DEBUG(dbgs() << "HW looping will remove: " << *MI); + + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I != E; I = nextI) { + nextI = std::next(I); // I is invalidated by the setReg + MachineOperand &Use = *I; + MachineInstr *UseMI = I->getParent(); + if (UseMI == MI) + continue; + if (Use.isDebug()) + UseMI->getOperand(0).setReg(0U); + } + } + + MI->eraseFromParent(); + for (unsigned i = 0; i < DeadPhis.size(); ++i) + DeadPhis[i]->eraseFromParent(); + } +} + +/// \brief Check if the loop is a candidate for converting to a hardware +/// loop. If so, then perform the transformation. +/// +/// This function works on innermost loops first. A loop can be converted +/// if it is a counting loop; either a register value or an immediate. +/// +/// The code makes several assumptions about the representation of the loop +/// in llvm. +bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, + bool &RecL0used, + bool &RecL1used) { + // This is just for sanity. + assert(L->getHeader() && "Loop without a header?"); + + bool Changed = false; + bool L0Used = false; + bool L1Used = false; + + // Process nested loops first. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used); + L0Used |= RecL0used; + L1Used |= RecL1used; + } + + // If a nested loop has been converted, then we can't convert this loop. + if (Changed && L0Used && L1Used) + return Changed; + + unsigned LOOP_i; + unsigned LOOP_r; + unsigned ENDLOOP; + + // Flag used to track loopN instruction: + // 1 - Hardware loop is being generated for the inner most loop. + // 0 - Hardware loop is being generated for the outer loop. + unsigned IsInnerHWLoop = 1; + + if (L0Used) { + LOOP_i = Hexagon::J2_loop1i; + LOOP_r = Hexagon::J2_loop1r; + ENDLOOP = Hexagon::ENDLOOP1; + IsInnerHWLoop = 0; + } else { + LOOP_i = Hexagon::J2_loop0i; + LOOP_r = Hexagon::J2_loop0r; + ENDLOOP = Hexagon::ENDLOOP0; + } + +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = HWLoopLimit; + if (Limit >= 0) { + if (Counter >= HWLoopLimit) + return false; + Counter++; + } +#endif + + // Does the loop contain any invalid instructions? + if (containsInvalidInstruction(L, IsInnerHWLoop)) + return false; + + MachineBasicBlock *LastMBB = getExitingBlock(L); + // Don't generate hw loop if the loop has more than one exit. + if (!LastMBB) + return false; + + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + if (LastI == LastMBB->end()) + return false; + + // Is the induction variable bump feeding the latch condition? + if (!fixupInductionVariable(L)) + return false; + + // Ensure the loop has a preheader: the loop instruction will be + // placed there. + MachineBasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = createPreheaderForLoop(L); + if (!Preheader) + return false; + } + + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + + SmallVector<MachineInstr*, 2> OldInsts; + // Are we able to determine the trip count for the loop? + CountValue *TripCount = getLoopTripCount(L, OldInsts); + if (!TripCount) + return false; + + // Is the trip count available in the preheader? + if (TripCount->isReg()) { + // There will be a use of the register inserted into the preheader, + // so make sure that the register is actually defined at that point. + MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); + MachineBasicBlock *BBDef = TCDef->getParent(); + if (!MDT->dominates(BBDef, Preheader)) + return false; + } + + // Determine the loop start. + MachineBasicBlock *TopBlock = L->getTopBlock(); + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + MachineBasicBlock *LoopStart = 0; + if (ExitingBlock != L->getLoopLatch()) { + MachineBasicBlock *TB = 0, *FB = 0; + SmallVector<MachineOperand, 2> Cond; + + if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false)) + return false; + + if (L->contains(TB)) + LoopStart = TB; + else if (L->contains(FB)) + LoopStart = FB; + else + return false; + } + else + LoopStart = TopBlock; + + // Convert the loop to a hardware loop. + DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); + DebugLoc DL; + if (InsertPos != Preheader->end()) + DL = InsertPos->getDebugLoc(); + + if (TripCount->isReg()) { + // Create a copy of the loop count register. + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) + .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); + // Add the Loop instruction to the beginning of the loop. + BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) + .addReg(CountReg); + } else { + assert(TripCount->isImm() && "Expecting immediate value for trip count"); + // Add the Loop immediate instruction to the beginning of the loop, + // if the immediate fits in the instructions. Otherwise, we need to + // create a new virtual register. + int64_t CountImm = TripCount->getImm(); + if (!TII->isValidOffset(LOOP_i, CountImm)) { + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) + .addImm(CountImm); + BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) + .addMBB(LoopStart).addReg(CountReg); + } else + BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) + .addMBB(LoopStart).addImm(CountImm); + } + + // Make sure the loop start always has a reference in the CFG. We need + // to create a BlockAddress operand to get this mechanism to work both the + // MachineBasicBlock and BasicBlock objects need the flag set. + LoopStart->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object. + BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); + + // Replace the loop branch with an endloop instruction. + DebugLoc LastIDL = LastI->getDebugLoc(); + BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); + + // The loop ends with either: + // - a conditional branch followed by an unconditional branch, or + // - a conditional branch to the loop start. + if (LastI->getOpcode() == Hexagon::J2_jumpt || + LastI->getOpcode() == Hexagon::J2_jumpf) { + // Delete one and change/add an uncond. branch to out of the loop. + MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); + LastI = LastMBB->erase(LastI); + if (!L->contains(BranchTarget)) { + if (LastI != LastMBB->end()) + LastI = LastMBB->erase(LastI); + SmallVector<MachineOperand, 0> Cond; + TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); + } + } else { + // Conditional branch to loop start; just delete it. + LastMBB->erase(LastI); + } + delete TripCount; + + // The induction operation and the comparison may now be + // unneeded. If these are unneeded, then remove them. + for (unsigned i = 0; i < OldInsts.size(); ++i) + removeIfDead(OldInsts[i]); + + ++NumHWLoops; + + // Set RecL1used and RecL0used only after hardware loop has been + // successfully generated. Doing it earlier can cause wrong loop instruction + // to be used. + if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. + RecL1used = true; + else + RecL0used = true; + + return true; +} + +bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, + MachineInstr *CmpI) { + assert (BumpI != CmpI && "Bump and compare in the same instruction?"); + + MachineBasicBlock *BB = BumpI->getParent(); + if (CmpI->getParent() != BB) + return false; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + // Check if things are in order to begin with. + for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I) + if (&*I == CmpI) + return true; + + // Out of order. + unsigned PredR = CmpI->getOperand(0).getReg(); + bool FoundBump = false; + instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); + for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { + MachineInstr *In = &*I; + for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { + MachineOperand &MO = In->getOperand(i); + if (MO.isReg() && MO.isUse()) { + if (MO.getReg() == PredR) // Found an intervening use of PredR. + return false; + } + } + + if (In == BumpI) { + BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); + FoundBump = true; + break; + } + } + assert (FoundBump && "Cannot determine instruction order"); + return FoundBump; +} + +/// This function is required to break recursion. Visiting phis in a loop may +/// result in recursion during compilation. We break the recursion by making +/// sure that we visit a MachineOperand and its definition in a +/// MachineInstruction only once. If we attempt to visit more than once, then +/// there is recursion, and will return false. +bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, + MachineInstr *MI, + const MachineOperand *MO, + LoopFeederMap &LoopFeederPhi) const { + if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { + const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); + DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); + // Ignore all BBs that form Loop. + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + if (A == MBB) + return false; + } + MachineInstr *Def = MRI->getVRegDef(MO->getReg()); + LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); + return true; + } else + // Already visited node. + return false; +} + +/// Return true if a Phi may generate a value that can underflow. +/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. +bool HexagonHardwareLoops::phiMayWrapOrUnderflow( + MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, + MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { + assert(Phi->isPHI() && "Expecting a Phi."); + // Walk through each Phi, and its used operands. Make sure that + // if there is recursion in Phi, we won't generate hardware loops. + for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2) + if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)) + if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, + Phi->getParent(), L, LoopFeederPhi)) + return true; + return false; +} + +/// Return true if the induction variable can underflow in the first iteration. +/// An example, is an initial unsigned value that is 0 and is decrement in the +/// first itertion of a do-while loop. In this case, we cannot generate a +/// hardware loop because the endloop instruction does not decrement the loop +/// counter if it is <= 1. We only need to perform this analysis if the +/// initial value is a register. +/// +/// This function assumes the initial value may underfow unless proven +/// otherwise. If the type is signed, then we don't care because signed +/// underflow is undefined. We attempt to prove the initial value is not +/// zero by perfoming a crude analysis of the loop counter. This function +/// checks if the initial value is used in any comparison prior to the loop +/// and, if so, assumes the comparison is a range check. This is inexact, +/// but will catch the simple cases. +bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( + const MachineOperand *InitVal, const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const { + // Only check register values since they are unknown. + if (!InitVal->isReg()) + return false; + + if (!EndVal->isImm()) + return false; + + // A register value that is assigned an immediate is a known value, and it + // won't underflow in the first iteration. + int64_t Imm; + if (checkForImmediate(*InitVal, Imm)) + return (EndVal->getImm() == Imm); + + unsigned Reg = InitVal->getReg(); + + // We don't know the value of a physical register. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return true; + + MachineInstr *Def = MRI->getVRegDef(Reg); + if (!Def) + return true; + + // If the initial value is a Phi or copy and the operands may not underflow, + // then the definition cannot be underflow either. + if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), + L, LoopFeederPhi)) + return false; + if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), + EndVal, Def->getParent(), + L, LoopFeederPhi)) + return false; + + // Iterate over the uses of the initial value. If the initial value is used + // in a compare, then we assume this is a range check that ensures the loop + // doesn't underflow. This is not an exact test and should be improved. + for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), + E = MRI->use_instr_nodbg_end(); I != E; ++I) { + MachineInstr *MI = &*I; + unsigned CmpReg1 = 0, CmpReg2 = 0; + int CmpMask = 0, CmpValue = 0; + + if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 2> Cond; + if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) + continue; + + Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0); + if (Cmp == 0) + continue; + if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) + Cmp = Comparison::getNegatedComparison(Cmp); + if (CmpReg2 != 0 && CmpReg2 == Reg) + Cmp = Comparison::getSwappedComparison(Cmp); + + // Signed underflow is undefined. + if (Comparison::isSigned(Cmp)) + return false; + + // Check if there is a comparison of the initial value. If the initial value + // is greater than or not equal to another value, then assume this is a + // range check. + if ((Cmp & Comparison::G) || Cmp == Comparison::NE) + return false; + } + + // OK - this is a hack that needs to be improved. We really need to analyze + // the instructions performed on the initial value. This works on the simplest + // cases only. + if (!Def->isCopy() && !Def->isPHI()) + return false; + + return true; +} + +bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, + int64_t &Val) const { + if (MO.isImm()) { + Val = MO.getImm(); + return true; + } + if (!MO.isReg()) + return false; + + // MO is a register. Check whether it is defined as an immediate value, + // and if so, get the value of it in TV. That value will then need to be + // processed to handle potential subregisters in MO. + int64_t TV; + + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + return false; + MachineInstr *DI = MRI->getVRegDef(R); + unsigned DOpc = DI->getOpcode(); + switch (DOpc) { + case TargetOpcode::COPY: + case Hexagon::A2_tfrsi: + case Hexagon::A2_tfrpi: + case Hexagon::CONST32_Int_Real: + case Hexagon::CONST64_Int_Real: { + // Call recursively to avoid an extra check whether operand(1) is + // indeed an immediate (it could be a global address, for example), + // plus we can handle COPY at the same time. + if (!checkForImmediate(DI->getOperand(1), TV)) + return false; + break; + } + case Hexagon::A2_combineii: + case Hexagon::A4_combineir: + case Hexagon::A4_combineii: + case Hexagon::A4_combineri: + case Hexagon::A2_combinew: { + const MachineOperand &S1 = DI->getOperand(1); + const MachineOperand &S2 = DI->getOperand(2); + int64_t V1, V2; + if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2)) + return false; + TV = V2 | (V1 << 32); + break; + } + case TargetOpcode::REG_SEQUENCE: { + const MachineOperand &S1 = DI->getOperand(1); + const MachineOperand &S3 = DI->getOperand(3); + int64_t V1, V3; + if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3)) + return false; + unsigned Sub2 = DI->getOperand(2).getImm(); + unsigned Sub4 = DI->getOperand(4).getImm(); + if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg) + TV = V1 | (V3 << 32); + else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg) + TV = V3 | (V1 << 32); + else + llvm_unreachable("Unexpected form of REG_SEQUENCE"); + break; + } + + default: + return false; + } + + // By now, we should have successfuly obtained the immediate value defining + // the register referenced in MO. Handle a potential use of a subregister. + switch (MO.getSubReg()) { + case Hexagon::subreg_loreg: + Val = TV & 0xFFFFFFFFULL; + break; + case Hexagon::subreg_hireg: + Val = (TV >> 32) & 0xFFFFFFFFULL; + break; + default: + Val = TV; + break; + } + return true; +} + +void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { + if (MO.isImm()) { + MO.setImm(Val); + return; + } + + assert(MO.isReg()); + unsigned R = MO.getReg(); + MachineInstr *DI = MRI->getVRegDef(R); + + const TargetRegisterClass *RC = MRI->getRegClass(R); + unsigned NewR = MRI->createVirtualRegister(RC); + MachineBasicBlock &B = *DI->getParent(); + DebugLoc DL = DI->getDebugLoc(); + BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); + MO.setReg(NewR); +} + +static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { + // These two instructions are not extendable. + if (CmpOpc == Hexagon::A4_cmpbeqi) + return isUInt<8>(Imm); + if (CmpOpc == Hexagon::A4_cmpbgti) + return isInt<8>(Imm); + // The rest of the comparison-with-immediate instructions are extendable. + return true; +} + +bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + + if (!(Header && Latch && ExitingBlock)) + return false; + + // These data structures follow the same concept as the corresponding + // ones in findInductionRegister (where some comments are). + typedef std::pair<unsigned,int64_t> RegisterBump; + typedef std::pair<unsigned,RegisterBump> RegisterInduction; + typedef std::set<RegisterInduction> RegisterInductionSet; + + // Register candidates for induction variables, with their associated bumps. + RegisterInductionSet IndRegs; + + // Look for induction patterns: + // vreg1 = PHI ..., [ latch, vreg2 ] + // vreg2 = ADD vreg1, imm + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp); + + if (isAdd) { + // If the register operand to the add/sub is the PHI we are looking + // at, this meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + MachineOperand &Opnd2 = DI->getOperand(2); + int64_t V; + if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { + unsigned UpdReg = DI->getOperand(0).getReg(); + IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); + } + } + } // for (i) + } // for (instr) + + if (IndRegs.empty()) + return false; + + MachineBasicBlock *TB = nullptr, *FB = nullptr; + SmallVector<MachineOperand,2> Cond; + // AnalyzeBranch returns true if it fails to analyze branch. + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + if (NotAnalyzed || Cond.empty()) + return false; + + if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { + MachineBasicBlock *LTB = 0, *LFB = 0; + SmallVector<MachineOperand,2> LCond; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false); + if (NotAnalyzed) + return false; + + // Since latch is not the exiting block, the latch branch should be an + // unconditional branch to the loop header. + if (TB == Latch) + TB = (LTB == Header) ? LTB : LFB; + else + FB = (LTB == Header) ? LTB : LFB; + } + if (TB != Header) { + if (FB != Header) { + // The latch/exit block does not go back to the header. + return false; + } + // FB is the header (i.e., uncond. jump to branch header) + // In this case, the LoopBody -> TB should not be a back edge otherwise + // it could result in an infinite loop after conversion to hw_loop. + // This case can happen when the Latch has two jumps like this: + // Jmp_c OuterLoopHeader <-- TB + // Jmp InnerLoopHeader <-- FB + if (MDT->dominates(TB, FB)) + return false; + } + + // Expecting a predicate register as a condition. It won't be a hardware + // predicate register at this point yet, just a vreg. + // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) + // into Cond, followed by the predicate register. For non-negated branches + // it's just the register. + unsigned CSz = Cond.size(); + if (CSz != 1 && CSz != 2) + return false; + + if (!Cond[CSz-1].isReg()) + return false; + + unsigned P = Cond[CSz-1].getReg(); + MachineInstr *PredDef = MRI->getVRegDef(P); + + if (!PredDef->isCompare()) + return false; + + SmallSet<unsigned,2> CmpRegs; + MachineOperand *CmpImmOp = nullptr; + + // Go over all operands to the compare and look for immediate and register + // operands. Assume that if the compare has a single register use and a + // single immediate operand, then the register is being compared with the + // immediate value. + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg()) { + // Skip all implicit references. In one case there was: + // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use> + if (MO.isImplicit()) + continue; + if (MO.isUse()) { + if (!isImmediate(MO)) { + CmpRegs.insert(MO.getReg()); + continue; + } + // Consider the register to be the "immediate" operand. + if (CmpImmOp) + return false; + CmpImmOp = &MO; + } + } else if (MO.isImm()) { + if (CmpImmOp) // A second immediate argument? Confusing. Bail out. + return false; + CmpImmOp = &MO; + } + } + + if (CmpRegs.empty()) + return false; + + // Check if the compared register follows the order we want. Fix if needed. + for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); + I != E; ++I) { + // This is a success. If the register used in the comparison is one that + // we have identified as a bumped (updated) induction register, there is + // nothing to do. + if (CmpRegs.count(I->first)) + return true; + + // Otherwise, if the register being compared comes out of a PHI node, + // and has been recognized as following the induction pattern, and is + // compared against an immediate, we can fix it. + const RegisterBump &RB = I->second; + if (CmpRegs.count(RB.first)) { + if (!CmpImmOp) { + // If both operands to the compare instruction are registers, see if + // it can be changed to use induction register as one of the operands. + MachineInstr *IndI = nullptr; + MachineInstr *nonIndI = nullptr; + MachineOperand *IndMO = nullptr; + MachineOperand *nonIndMO = nullptr; + + for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg() && MO.getReg() == RB.first) { + DEBUG(dbgs() << "\n DefMI(" << i << ") = " + << *(MRI->getVRegDef(I->first))); + if (IndI) + return false; + + IndI = MRI->getVRegDef(I->first); + IndMO = &MO; + } else if (MO.isReg()) { + DEBUG(dbgs() << "\n DefMI(" << i << ") = " + << *(MRI->getVRegDef(MO.getReg()))); + if (nonIndI) + return false; + + nonIndI = MRI->getVRegDef(MO.getReg()); + nonIndMO = &MO; + } + } + if (IndI && nonIndI && + nonIndI->getOpcode() == Hexagon::A2_addi && + nonIndI->getOperand(2).isImm() && + nonIndI->getOperand(2).getImm() == - RB.second) { + bool Order = orderBumpCompare(IndI, PredDef); + if (Order) { + IndMO->setReg(I->first); + nonIndMO->setReg(nonIndI->getOperand(1).getReg()); + return true; + } + } + return false; + } + + // It is not valid to do this transformation on an unsigned comparison + // because it may underflow. + Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0); + if (!Cmp || Comparison::isUnsigned(Cmp)) + return false; + + // If the register is being compared against an immediate, try changing + // the compare instruction to use induction register and adjust the + // immediate operand. + int64_t CmpImm = getImmediate(*CmpImmOp); + int64_t V = RB.second; + // Handle Overflow (64-bit). + if (((V > 0) && (CmpImm > INT64_MAX - V)) || + ((V < 0) && (CmpImm < INT64_MIN - V))) + return false; + CmpImm += V; + // Most comparisons of register against an immediate value allow + // the immediate to be constant-extended. There are some exceptions + // though. Make sure the new combination will work. + if (CmpImmOp->isImm()) + if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm)) + return false; + + // Make sure that the compare happens after the bump. Otherwise, + // after the fixup, the compare would use a yet-undefined register. + MachineInstr *BumpI = MRI->getVRegDef(I->first); + bool Order = orderBumpCompare(BumpI, PredDef); + if (!Order) + return false; + + // Finally, fix the compare instruction. + setImmediate(*CmpImmOp, CmpImm); + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg() && MO.getReg() == RB.first) { + MO.setReg(I->first); + return true; + } + } + } + } + + return false; +} + +/// \brief Create a preheader for a given loop. +MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( + MachineLoop *L) { + if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) + return TmpPH; + + if (!HWCreatePreheader) + return nullptr; + + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + MachineBasicBlock *ExitingBlock = getExitingBlock(L); + MachineFunction *MF = Header->getParent(); + DebugLoc DL; + +#ifndef NDEBUG + if ((PHFn != "") && (PHFn != MF->getName())) + return nullptr; +#endif + + if (!Latch || !ExitingBlock || Header->hasAddressTaken()) + return nullptr; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + + // Verify that all existing predecessors have analyzable branches + // (or no branches at all). + typedef std::vector<MachineBasicBlock*> MBBVector; + MBBVector Preds(Header->pred_begin(), Header->pred_end()); + SmallVector<MachineOperand,2> Tmp1; + MachineBasicBlock *TB = nullptr, *FB = nullptr; + + if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) + return nullptr; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); + if (NotAnalyzed) + return nullptr; + } + + MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); + MF->insert(Header->getIterator(), NewPH); + + if (Header->pred_size() > 2) { + // Ensure that the header has only two predecessors: the preheader and + // the loop latch. Any additional predecessors of the header should + // join at the newly created preheader. Inspect all PHI nodes from the + // header and create appropriate corresponding PHI nodes in the preheader. + + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + + const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); + MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); + NewPH->insert(NewPH->end(), NewPN); + + unsigned PR = PN->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(PR); + unsigned NewPR = MRI->createVirtualRegister(RC); + NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); + + // Copy all non-latch operands of a header's PHI node to the newly + // created PHI node in the preheader. + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + unsigned PredR = PN->getOperand(i).getReg(); + unsigned PredRSub = PN->getOperand(i).getSubReg(); + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB == Latch) + continue; + + MachineOperand MO = MachineOperand::CreateReg(PredR, false); + MO.setSubReg(PredRSub); + NewPN->addOperand(MO); + NewPN->addOperand(MachineOperand::CreateMBB(PredB)); + } + + // Remove copied operands from the old PHI node and add the value + // coming from the preheader's PHI. + for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB != Latch) { + PN->RemoveOperand(i+1); + PN->RemoveOperand(i); + } + } + PN->addOperand(MachineOperand::CreateReg(NewPR, false)); + PN->addOperand(MachineOperand::CreateMBB(NewPH)); + } + + } else { + assert(Header->pred_size() == 2); + + // The header has only two predecessors, but the non-latch predecessor + // is not a preheader (e.g. it has other successors, etc.) + // In such a case we don't need any extra PHI nodes in the new preheader, + // all we need is to adjust existing PHIs in the header to now refer to + // the new preheader. + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + MachineOperand &MO = PN->getOperand(i+1); + if (MO.getMBB() != Latch) + MO.setMBB(NewPH); + } + } + } + + // "Reroute" the CFG edges to link in the new preheader. + // If any of the predecessors falls through to the header, insert a branch + // to the new preheader in that place. + SmallVector<MachineOperand,1> Tmp2; + SmallVector<MachineOperand,1> EmptyCond; + + TB = FB = nullptr; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + if (PB != Latch) { + Tmp2.clear(); + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false); + (void)NotAnalyzed; // suppress compiler warning + assert (!NotAnalyzed && "Should be analyzable!"); + if (TB != Header && (Tmp2.empty() || FB != Header)) + TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL); + PB->ReplaceUsesOfBlockWith(Header, NewPH); + } + } + + // It can happen that the latch block will fall through into the header. + // Insert an unconditional branch to the header. + TB = FB = nullptr; + bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); + (void)LatchNotAnalyzed; // suppress compiler warning + assert (!LatchNotAnalyzed && "Should be analyzable!"); + if (!TB && !FB) + TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL); + + // Finally, the branch from the preheader to the header. + TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL); + NewPH->addSuccessor(Header); + + MachineLoop *ParentLoop = L->getParentLoop(); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase()); + + // Update the dominator information with the new preheader. + if (MDT) { + MachineDomTreeNode *HDom = MDT->getNode(Header); + MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock()); + MDT->changeImmediateDominator(Header, NewPH); + } + + return NewPH; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp new file mode 100644 index 0000000..a0da945 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -0,0 +1,1563 @@ +//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the Hexagon target. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonISelLowering.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "hexagon-isel" + +static +cl::opt<unsigned> +MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", + cl::Hidden, cl::init(2), + cl::desc("Maximum number of uses of a global address such that we still us a" + "constant extended instruction")); + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + void initializeHexagonDAGToDAGISelPass(PassRegistry&); +} + +//===--------------------------------------------------------------------===// +/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine +/// instructions for SelectionDAG operations. +/// +namespace { +class HexagonDAGToDAGISel : public SelectionDAGISel { + const HexagonTargetMachine& HTM; + const HexagonSubtarget *HST; + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; +public: + explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr), + HRI(nullptr) { + initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + HST = &MF.getSubtarget<HexagonSubtarget>(); + HII = HST->getInstrInfo(); + HRI = HST->getRegisterInfo(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + + virtual void PreprocessISelDAG() override; + virtual void EmitFunctionEntryCode() override; + + SDNode *Select(SDNode *N) override; + + // Complex Pattern Selectors. + inline bool SelectAddrGA(SDValue &N, SDValue &R); + inline bool SelectAddrGP(SDValue &N, SDValue &R); + bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); + bool SelectAddrFI(SDValue &N, SDValue &R); + + const char *getPassName() const override { + return "Hexagon DAG->DAG Pattern Instruction Selection"; + } + + SDNode *SelectFrameIndex(SDNode *N); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + unsigned ConstraintID, + std::vector<SDValue> &OutOps) override; + SDNode *SelectLoad(SDNode *N); + SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); + SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl); + SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, + SDLoc dl); + SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, + SDLoc dl); + SDNode *SelectBaseOffsetStore(StoreSDNode *ST, SDLoc dl); + SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl); + SDNode *SelectStore(SDNode *N); + SDNode *SelectSHL(SDNode *N); + SDNode *SelectMul(SDNode *N); + SDNode *SelectZeroExtend(SDNode *N); + SDNode *SelectIntrinsicWChain(SDNode *N); + SDNode *SelectIntrinsicWOChain(SDNode *N); + SDNode *SelectConstant(SDNode *N); + SDNode *SelectConstantFP(SDNode *N); + SDNode *SelectAdd(SDNode *N); + SDNode *SelectBitOp(SDNode *N); + + // XformMskToBitPosU5Imm - Returns the bit position which + // the single bit 32 bit mask represents. + // Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU5Imm(uint32_t Imm, SDLoc DL) { + int32_t bitPos; + bitPos = Log2_32(Imm); + assert(bitPos >= 0 && bitPos < 32 && + "Constant out of range for 32 BitPos Memops"); + return CurDAG->getTargetConstant(bitPos, DL, MVT::i32); + } + + // XformMskToBitPosU4Imm - Returns the bit position which the single-bit + // 16 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU4Imm(uint16_t Imm, SDLoc DL) { + return XformMskToBitPosU5Imm(Imm, DL); + } + + // XformMskToBitPosU3Imm - Returns the bit position which the single-bit + // 8 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU3Imm(uint8_t Imm, SDLoc DL) { + return XformMskToBitPosU5Imm(Imm, DL); + } + + // Return true if there is exactly one bit set in V, i.e., if V is one of the + // following integers: 2^0, 2^1, ..., 2^31. + bool ImmIsSingleBit(uint32_t v) const { + return isPowerOf2_32(v); + } + + // XformM5ToU5Imm - Return a target constant with the specified value, of + // type i32 where the negative literal is transformed into a positive literal + // for use in -= memops. + inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) { + assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant(-Imm, DL, MVT::i32); + } + + // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range + // [1..128], used in cmpb.gtu instructions. + inline SDValue XformU7ToU7M1Imm(signed Imm, SDLoc DL) { + assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); + return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8); + } + + // XformS8ToS8M1Imm - Return a target constant decremented by 1. + inline SDValue XformSToSM1Imm(signed Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); + } + + // XformU8ToU8M1Imm - Return a target constant decremented by 1. + inline SDValue XformUToUM1Imm(unsigned Imm, SDLoc DL) { + assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); + return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); + } + + // XformSToSM2Imm - Return a target constant decremented by 2. + inline SDValue XformSToSM2Imm(unsigned Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32); + } + + // XformSToSM3Imm - Return a target constant decremented by 3. + inline SDValue XformSToSM3Imm(unsigned Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32); + } + + // Include the pieces autogenerated from the target description. + #include "HexagonGenDAGISel.inc" + +private: + bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); +}; // end HexagonDAGToDAGISel +} // end anonymous namespace + + +/// createHexagonISelDag - This pass converts a legalized DAG into a +/// Hexagon-specific DAG, ready for instruction scheduling. +/// +namespace llvm { +FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new HexagonDAGToDAGISel(TM, OptLevel); +} +} + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "hexagon-isel", + &SelectionDAGISel::ID, nullptr, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + + +// Intrinsics that return a a predicate. +static bool doesIntrinsicReturnPredicate(unsigned ID) { + switch (ID) { + default: + return false; + case Intrinsic::hexagon_C2_cmpeq: + case Intrinsic::hexagon_C2_cmpgt: + case Intrinsic::hexagon_C2_cmpgtu: + case Intrinsic::hexagon_C2_cmpgtup: + case Intrinsic::hexagon_C2_cmpgtp: + case Intrinsic::hexagon_C2_cmpeqp: + case Intrinsic::hexagon_C2_bitsset: + case Intrinsic::hexagon_C2_bitsclr: + case Intrinsic::hexagon_C2_cmpeqi: + case Intrinsic::hexagon_C2_cmpgti: + case Intrinsic::hexagon_C2_cmpgtui: + case Intrinsic::hexagon_C2_cmpgei: + case Intrinsic::hexagon_C2_cmpgeui: + case Intrinsic::hexagon_C2_cmplt: + case Intrinsic::hexagon_C2_cmpltu: + case Intrinsic::hexagon_C2_bitsclri: + case Intrinsic::hexagon_C2_and: + case Intrinsic::hexagon_C2_or: + case Intrinsic::hexagon_C2_xor: + case Intrinsic::hexagon_C2_andn: + case Intrinsic::hexagon_C2_not: + case Intrinsic::hexagon_C2_orn: + case Intrinsic::hexagon_C2_pxfer_map: + case Intrinsic::hexagon_C2_any8: + case Intrinsic::hexagon_C2_all8: + case Intrinsic::hexagon_A2_vcmpbeq: + case Intrinsic::hexagon_A2_vcmpbgtu: + case Intrinsic::hexagon_A2_vcmpheq: + case Intrinsic::hexagon_A2_vcmphgt: + case Intrinsic::hexagon_A2_vcmphgtu: + case Intrinsic::hexagon_A2_vcmpweq: + case Intrinsic::hexagon_A2_vcmpwgt: + case Intrinsic::hexagon_A2_vcmpwgtu: + case Intrinsic::hexagon_C2_tfrrp: + case Intrinsic::hexagon_S2_tstbit_i: + case Intrinsic::hexagon_S2_tstbit_r: + return true; + } +} + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, + unsigned Opcode, + SDLoc dl) { + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Base, TargetConst, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, + Base, TargetConst0, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_3, 0), + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, + unsigned Opcode, + SDLoc dl) { + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, + MVT::i64, MVT::Other, + TargetConst0, + SDValue(Result_1,0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + + // Generate an indirect load. + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Base, TargetConst0, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, + MVT::i64, MVT::Other, + TargetConst0, + SDValue(Result_1,0)); + // Add offset to base. + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), // Load value. + SDValue(Result_3, 0), // New address. + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT LoadedVT = LD->getMemoryVT(); + unsigned Opcode = 0; + + // Check for zero extended loads. Treat any-extend loads as zero extended + // loads. + ISD::LoadExtType ExtType = LD->getExtensionType(); + bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); + bool HasVecOffset = false; + + // Figure out the opcode. + if (LoadedVT == MVT::i64) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::L2_loadrd_pi; + else + Opcode = Hexagon::L2_loadrd_io; + } else if (LoadedVT == MVT::i32) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::L2_loadri_pi; + else + Opcode = Hexagon::L2_loadri_io; + } else if (LoadedVT == MVT::i16) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; + else + Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; + } else if (LoadedVT == MVT::i8) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; + else + Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; + } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 || + LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) { + HasVecOffset = true; + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + Opcode = Hexagon::V6_vL32b_pi; + } + else + Opcode = Hexagon::V6_vL32b_ai; + // 128B + } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 || + LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) { + HasVecOffset = true; + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + Opcode = Hexagon::V6_vL32b_pi_128B; + } + else + Opcode = Hexagon::V6_vL32b_ai_128B; + } else + llvm_unreachable("unknown memory type"); + + // For zero extended i64 loads, we need to add combine instructions. + if (LD->getValueType(0) == MVT::i64 && IsZeroExt) + return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); + // Handle sign extended i64 loads. + if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD) + return SelectIndexedLoadSignExtend64(LD, Opcode, dl); + + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + if (HasVecOffset) { + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 2); + } else { + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 1), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 3); + } + return Result; + } else { + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::Other, Base, TargetConst0, + Chain); + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_1, 0), + SDValue(Result_2, 0), + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_1; + } +} + + +SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { + SDNode *result; + SDLoc dl(N); + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + + // Handle indexed loads. + if (AM != ISD::UNINDEXED) { + result = SelectIndexedLoad(LD, dl); + } else { + result = SelectCode(LD); + } + + return result; +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { + SDValue Chain = ST->getChain(); + SDValue Base = ST->getBasePtr(); + SDValue Offset = ST->getOffset(); + SDValue Value = ST->getValue(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT StoredVT = ST->getMemoryVT(); + EVT ValueVT = Value.getValueType(); + + // Offset value must be within representable range + // and must have correct alignment properties. + if (HII->isValidAutoIncImm(StoredVT, Val)) { + unsigned Opcode = 0; + + // Figure out the post inc version of opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_pi; + else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi; + else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi; + else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; + else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || + StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) { + Opcode = Hexagon::V6_vS32b_pi; + } + // 128B + else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || + StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) { + Opcode = Hexagon::V6_vS32b_pi_128B; + } else llvm_unreachable("unknown memory type"); + + if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { + assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); + Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, + dl, MVT::i32, Value); + } + SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, dl, MVT::i32), Value, + Chain}; + // Build post increment store. + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Ops); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(ST, Result); + ReplaceUses(SDValue(ST,1), SDValue(Result,1)); + return Result; + } + + // Note: Order of operands matches the def of instruction: + // def S2_storerd_io + // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ... + // and it differs for POST_ST* for instance. + SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, dl, MVT::i32), Value, + Chain}; + unsigned Opcode = 0; + + // Figure out the opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io; + else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; + else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; + else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; + else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || + StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) + Opcode = Hexagon::V6_vS32b_ai; + // 128B + else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || + StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) + Opcode = Hexagon::V6_vS32b_ai_128B; + else llvm_unreachable("unknown memory type"); + + // Build regular store. + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); + // Build splitted incriment instruction. + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, + TargetConstVal, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(SDValue(ST,0), SDValue(Result_2,0)); + ReplaceUses(SDValue(ST,1), SDValue(Result_1,0)); + return Result_2; +} + +SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { + SDLoc dl(N); + StoreSDNode *ST = cast<StoreSDNode>(N); + ISD::MemIndexedMode AM = ST->getAddressingMode(); + + // Handle indexed stores. + if (AM != ISD::UNINDEXED) { + return SelectIndexedStore(ST, dl); + } + + return SelectCode(ST); +} + +SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { + SDLoc dl(N); + + // + // %conv.i = sext i32 %tmp1 to i64 + // %conv2.i = sext i32 %add to i64 + // %mul.i = mul nsw i64 %conv2.i, %conv.i + // + // --- match with the following --- + // + // %mul.i = mpy (%tmp1, %add) + // + + if (N->getValueType(0) == MVT::i64) { + // Shifting a i64 signed multiply. + SDValue MulOp0 = N->getOperand(0); + SDValue MulOp1 = N->getOperand(1); + + SDValue OP0; + SDValue OP1; + + // Handle sign_extend and sextload. + if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext0 = MulOp0.getOperand(0); + if (Sext0.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP0 = Sext0; + } else if (MulOp0.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Same goes for the second operand. + if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext1 = MulOp1.getOperand(0); + if (Sext1.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP1 = Sext1; + } else if (MulOp1.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Generate a mpy instruction. + SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64, + OP0, OP1); + ReplaceUses(N, Result); + return Result; + } + + return SelectCode(N); +} + +SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0) == MVT::i32) { + SDValue Shl_0 = N->getOperand(0); + SDValue Shl_1 = N->getOperand(1); + // RHS is const. + if (Shl_1.getOpcode() == ISD::Constant) { + if (Shl_0.getOpcode() == ISD::MUL) { + SDValue Mul_0 = Shl_0.getOperand(0); // Val + SDValue Mul_1 = Shl_0.getOperand(1); // Const + // RHS of mul is const. + if (Mul_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t MulConst = + cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue(); + int32_t ValConst = MulConst << ShlConst; + SDValue Val = CurDAG->getTargetConstant(ValConst, dl, + MVT::i32); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, + MVT::i32, Mul_0, Val); + ReplaceUses(N, Result); + return Result; + } + + } + } else if (Shl_0.getOpcode() == ISD::SUB) { + SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 + SDValue Sub_1 = Shl_0.getOperand(1); // Val + if (Sub_0.getOpcode() == ISD::Constant) { + int32_t SubConst = + cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue(); + if (SubConst == 0) { + if (Sub_1.getOpcode() == ISD::SHL) { + SDValue Shl2_0 = Sub_1.getOperand(0); // Val + SDValue Shl2_1 = Sub_1.getOperand(1); // Const + if (Shl2_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t Shl2Const = + cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue(); + int32_t ValConst = 1 << (ShlConst+Shl2Const); + SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, + MVT::i32); + if (ConstantSDNode *CN = + dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32, + Shl2_0, Val); + ReplaceUses(N, Result); + return Result; + } + } + } + } + } + } + } + } + return SelectCode(N); +} + + +// +// If there is an zero_extend followed an intrinsic in DAG (this means - the +// result of the intrinsic is predicate); convert the zero_extend to +// transfer instruction. +// +// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be +// converted into a MUX as predicate registers defined as 1 bit in the +// compiler. Architecture defines them as 8-bit registers. +// We want to preserve all the lower 8-bits and, not just 1 LSB bit. +// +SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { + SDLoc dl(N); + + SDValue Op0 = N->getOperand(0); + EVT OpVT = Op0.getValueType(); + unsigned OpBW = OpVT.getSizeInBits(); + + // Special handling for zero-extending a vector of booleans. + if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) { + SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0); + unsigned NE = OpVT.getVectorNumElements(); + EVT ExVT = N->getValueType(0); + unsigned ES = ExVT.getVectorElementType().getSizeInBits(); + uint64_t MV = 0, Bit = 1; + for (unsigned i = 0; i < NE; ++i) { + MV |= Bit; + Bit <<= ES; + } + SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64); + SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl, + MVT::i64, Ones); + if (ExVT.getSizeInBits() == 32) { + SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64, + SDValue(Mask,0), SDValue(OnesReg,0)); + SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl, + MVT::i32); + return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, + SDValue(And,0), SubR); + } + return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, + SDValue(Mask,0), SDValue(OnesReg,0)); + } + + SDNode *IsIntrinsic = N->getOperand(0).getNode(); + if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { + unsigned ID = + cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue(); + if (doesIntrinsicReturnPredicate(ID)) { + // Now we need to differentiate target data types. + if (N->getValueType(0) == MVT::i64) { + // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs). + SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, + MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, + MVT::i64, MVT::Other, + SDValue(Result_2, 0), + SDValue(Result_1, 0)); + ReplaceUses(N, Result_3); + return Result_3; + } + if (N->getValueType(0) == MVT::i32) { + // Convert the zero_extend to Rs = Pd + SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + ReplaceUses(N, RsPd); + return RsPd; + } + llvm_unreachable("Unexpected value type"); + } + } + return SelectCode(N); +} + +// +// Checking for intrinsics circular load/store, and bitreverse load/store +// instrisics in order to select the correct lowered operation. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::hexagon_circ_ldd || + IntNo == Intrinsic::hexagon_circ_ldw || + IntNo == Intrinsic::hexagon_circ_lduh || + IntNo == Intrinsic::hexagon_circ_ldh || + IntNo == Intrinsic::hexagon_circ_ldub || + IntNo == Intrinsic::hexagon_circ_ldb) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + SDValue Offset = N->getOperand(5); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + // Only the *_ld instructions push the extra return type, and bump the + // result node operand number correspondingly. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_circ_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pci_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_circ_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pci_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_circ_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_circ_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 5> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); + Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32)); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + if (IntNo == Intrinsic::hexagon_brev_ldd || + IntNo == Intrinsic::hexagon_brev_ldw || + IntNo == Intrinsic::hexagon_brev_ldh || + IntNo == Intrinsic::hexagon_brev_lduh || + IntNo == Intrinsic::hexagon_brev_ldb || + IntNo == Intrinsic::hexagon_brev_ldub) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_brev_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pbr_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_brev_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pbr_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_brev_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_brev_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 4> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + return SelectCode(N); +} + +// +// Checking for intrinsics which have predicate registers as operand(s) +// and lowering to the actual intrinsic. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { + unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned Bits; + switch (IID) { + case Intrinsic::hexagon_S2_vsplatrb: + Bits = 8; + break; + case Intrinsic::hexagon_S2_vsplatrh: + Bits = 16; + break; + default: + return SelectCode(N); + } + + SDValue const &V = N->getOperand(1); + SDValue U; + if (isValueExtension(V, Bits, U)) { + SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), U); + return SelectCode(R.getNode()); + } + return SelectCode(N); +} + +// +// Map floating point constant values. +// +SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { + SDLoc dl(N); + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); + APFloat APF = CN->getValueAPF(); + if (N->getValueType(0) == MVT::f32) { + return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32, + CurDAG->getTargetConstantFP(APF.convertToFloat(), dl, MVT::f32)); + } + else if (N->getValueType(0) == MVT::f64) { + return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64, + CurDAG->getTargetConstantFP(APF.convertToDouble(), dl, MVT::f64)); + } + + return SelectCode(N); +} + +// +// Map predicate true (encoded as -1 in LLVM) to a XOR. +// +SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0) == MVT::i1) { + SDNode* Result = 0; + int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); + if (Val == -1) { + Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1); + } else if (Val == 0) { + Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1); + } + if (Result) { + ReplaceUses(N, Result); + return Result; + } + } + + return SelectCode(N); +} + + +// +// Map add followed by a asr -> asr +=. +// +SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + // Identify nodes of the form: add(asr(...)). + SDNode* Src1 = N->getOperand(0).getNode(); + if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse() + || Src1->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that + // Rd and Rd' are assigned to the same register + SDNode* Result = CurDAG->getMachineNode(Hexagon::S2_asr_r_r_acc, dl, MVT::i32, + N->getOperand(1), + Src1->getOperand(0), + Src1->getOperand(1)); + ReplaceUses(N, Result); + + return Result; +} + +// +// Map the following, where possible. +// AND/FABS -> clrbit +// OR -> setbit +// XOR/FNEG ->toggle_bit. +// +SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { + SDLoc dl(N); + EVT ValueVT = N->getValueType(0); + + // We handle only 32 and 64-bit bit ops. + if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 || + ValueVT == MVT::f32 || ValueVT == MVT::f64)) + return SelectCode(N); + + // We handly only fabs and fneg for V5. + unsigned Opc = N->getOpcode(); + if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) + return SelectCode(N); + + int64_t Val = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if (N->getOperand(1).getOpcode() == ISD::Constant) + Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue(); + else + return SelectCode(N); + } + + if (Opc == ISD::AND) { + // Check if this is a bit-clearing AND, if not select code the usual way. + if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) || + (ValueVT == MVT::i64 && isPowerOf2_64(~Val))) + Val = ~Val; + else + return SelectCode(N); + } + + // If OR or AND is being fed by shl, srl and, sra don't do this change, + // because Hexagon provide |= &= on shl, srl, and sra. + // Traverse the DAG to see if there is shl, srl and sra. + if (Opc == ISD::OR || Opc == ISD::AND) { + switch (N->getOperand(0)->getOpcode()) { + default: + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: + return SelectCode(N); + } + } + + // Make sure it's power of 2. + unsigned BitPos = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) || + (ValueVT == MVT::i64 && !isPowerOf2_64(Val))) + return SelectCode(N); + + // Get the bit position. + BitPos = countTrailingZeros(uint64_t(Val)); + } else { + // For fabs and fneg, it's always the 31st bit. + BitPos = 31; + } + + unsigned BitOpc = 0; + // Set the right opcode for bitwise operations. + switch (Opc) { + default: + llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); + case ISD::AND: + case ISD::FABS: + BitOpc = Hexagon::S2_clrbit_i; + break; + case ISD::OR: + BitOpc = Hexagon::S2_setbit_i; + break; + case ISD::XOR: + case ISD::FNEG: + BitOpc = Hexagon::S2_togglebit_i; + break; + } + + SDNode *Result; + // Get the right SDVal for the opcode. + SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32); + + if (ValueVT == MVT::i32 || ValueVT == MVT::f32) { + Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT, + N->getOperand(0), SDVal); + } else { + // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it. + EVT SubValueVT; + if (ValueVT == MVT::i64) + SubValueVT = MVT::i32; + else + SubValueVT = MVT::f32; + + SDNode *Reg = N->getOperand(0).getNode(); + SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, + dl, MVT::i64); + + SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl, + MVT::i32); + SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl, + MVT::i32); + + SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, SDValue(Reg, 0)); + + SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, SDValue(Reg, 0)); + + // Clear/set/toggle hi or lo registers depending on the bit position. + if (SubValueVT != MVT::f32 && BitPos < 32) { + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregLO, SDVal); + const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx, + SDValue(Result0, 0), SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } else { + if (Opc != ISD::FABS && Opc != ISD::FNEG) + SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32); + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregHI, SDVal); + const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx, + SubregLO, SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } + } + + ReplaceUses(N, Result); + return Result; +} + + +SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { + MachineFrameInfo *MFI = MF->getFrameInfo(); + const HexagonFrameLowering *HFI = HST->getFrameLowering(); + int FX = cast<FrameIndexSDNode>(N)->getIndex(); + unsigned StkA = HFI->getStackAlignment(); + unsigned MaxA = MFI->getMaxAlignment(); + SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32); + SDLoc DL(N); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); + SDNode *R = 0; + + // Use TFR_FI when: + // - the object is fixed, or + // - there are no objects with higher-than-default alignment, or + // - there are no dynamically allocated objects. + // Otherwise, use TFR_FIA. + if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) { + R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero); + } else { + auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>(); + unsigned AR = HMFI.getStackAlignBaseVReg(); + SDValue CH = CurDAG->getEntryNode(); + SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero }; + R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops); + } + + if (N->getHasDebugValue()) + CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0)); + return R; +} + + +SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return nullptr; // Already selected. + } + + switch (N->getOpcode()) { + case ISD::Constant: + return SelectConstant(N); + + case ISD::ConstantFP: + return SelectConstantFP(N); + + case ISD::FrameIndex: + return SelectFrameIndex(N); + + case ISD::ADD: + return SelectAdd(N); + + case ISD::SHL: + return SelectSHL(N); + + case ISD::LOAD: + return SelectLoad(N); + + case ISD::STORE: + return SelectStore(N); + + case ISD::MUL: + return SelectMul(N); + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::FABS: + case ISD::FNEG: + return SelectBitOp(N); + + case ISD::ZERO_EXTEND: + return SelectZeroExtend(N); + + case ISD::INTRINSIC_W_CHAIN: + return SelectIntrinsicWChain(N); + + case ISD::INTRINSIC_WO_CHAIN: + return SelectIntrinsicWOChain(N); + } + + return SelectCode(N); +} + +bool HexagonDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector<SDValue> &OutOps) { + SDValue Inp = Op, Res; + + switch (ConstraintID) { + default: + return true; + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_o: // Offsetable. + case InlineAsm::Constraint_v: // Not offsetable. + case InlineAsm::Constraint_m: // Memory. + if (SelectAddrFI(Inp, Res)) + OutOps.push_back(Res); + else + OutOps.push_back(Inp); + break; + } + + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; +} + + +void HexagonDAGToDAGISel::PreprocessISelDAG() { + SelectionDAG &DAG = *CurDAG; + std::vector<SDNode*> Nodes; + for (SDNode &Node : DAG.allnodes()) + Nodes.push_back(&Node); + + // Simplify: (or (select c x 0) z) -> (select c (or x z) z) + // (or (select c 0 y) z) -> (select c z (or y z)) + // This may not be the right thing for all targets, so do it here. + for (auto I: Nodes) { + if (I->getOpcode() != ISD::OR) + continue; + + auto IsZero = [] (const SDValue &V) -> bool { + if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode())) + return SC->isNullValue(); + return false; + }; + auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool { + if (Op.getOpcode() != ISD::SELECT) + return false; + return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2)); + }; + + SDValue N0 = I->getOperand(0), N1 = I->getOperand(1); + EVT VT = I->getValueType(0); + bool SelN0 = IsSelect0(N0); + SDValue SOp = SelN0 ? N0 : N1; + SDValue VOp = SelN0 ? N1 : N0; + + if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) { + SDValue SC = SOp.getOperand(0); + SDValue SX = SOp.getOperand(1); + SDValue SY = SOp.getOperand(2); + SDLoc DLS = SOp; + if (IsZero(SY)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); + } else if (IsZero(SX)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); + } + } + } +} + +void HexagonDAGToDAGISel::EmitFunctionEntryCode() { + auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget()); + auto &HFI = *HST.getFrameLowering(); + if (!HFI.needsAligna(*MF)) + return; + + MachineFrameInfo *MFI = MF->getFrameInfo(); + MachineBasicBlock *EntryBB = &MF->front(); + unsigned AR = FuncInfo->CreateReg(MVT::i32); + unsigned MaxA = MFI->getMaxAlignment(); + BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR) + .addImm(MaxA); + MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR); +} + +// Match a frame index that can be used in an addressing mode. +bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { + if (N.getOpcode() != ISD::FrameIndex) + return false; + auto &HFI = *HST->getFrameLowering(); + MachineFrameInfo *MFI = MF->getFrameInfo(); + int FX = cast<FrameIndexSDNode>(N)->getIndex(); + if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF)) + return false; + R = CurDAG->getTargetFrameIndex(FX, MVT::i32); + return true; +} + +inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) { + return SelectGlobalAddress(N, R, false); +} + +inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) { + return SelectGlobalAddress(N, R, true); +} + +bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, + bool UseGP) { + switch (N.getOpcode()) { + case ISD::ADD: { + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + unsigned GAOpc = N0.getOpcode(); + if (UseGP && GAOpc != HexagonISD::CONST32_GP) + return false; + if (!UseGP && GAOpc != HexagonISD::CONST32) + return false; + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) { + SDValue Addr = N0.getOperand(0); + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) { + if (GA->getOpcode() == ISD::TargetGlobalAddress) { + uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue(); + R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const), + N.getValueType(), NewOff); + return true; + } + } + } + break; + } + case HexagonISD::CONST32: + // The operand(0) of CONST32 is TargetGlobalAddress, which is what we + // want in the instruction. + if (!UseGP) + R = N.getOperand(0); + return !UseGP; + case HexagonISD::CONST32_GP: + if (UseGP) + R = N.getOperand(0); + return UseGP; + default: + return false; + } + + return false; +} + +bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, + unsigned FromBits, SDValue &Src) { + unsigned Opc = Val.getOpcode(); + switch (Opc) { + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: { + SDValue const &Op0 = Val.getOperand(0); + EVT T = Op0.getValueType(); + if (T.isInteger() && T.getSizeInBits() == FromBits) { + Src = Op0; + return true; + } + break; + } + case ISD::SIGN_EXTEND_INREG: + case ISD::AssertSext: + case ISD::AssertZext: + if (Val.getOperand(0).getValueType().isInteger()) { + VTSDNode *T = cast<VTSDNode>(Val.getOperand(1)); + if (T->getVT().getSizeInBits() == FromBits) { + Src = Val.getOperand(0); + return true; + } + } + break; + case ISD::AND: { + // Check if this is an AND with "FromBits" of lower bits set to 1. + uint64_t FromMask = (1 << FromBits) - 1; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { + if (C->getZExtValue() == FromMask) { + Src = Val.getOperand(1); + return true; + } + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { + if (C->getZExtValue() == FromMask) { + Src = Val.getOperand(0); + return true; + } + } + break; + } + case ISD::OR: + case ISD::XOR: { + // OR/XOR with the lower "FromBits" bits set to 0. + uint64_t FromMask = (1 << FromBits) - 1; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { + if ((C->getZExtValue() & FromMask) == 0) { + Src = Val.getOperand(1); + return true; + } + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { + if ((C->getZExtValue() & FromMask) == 0) { + Src = Val.getOperand(0); + return true; + } + } + } + default: + break; + } + return false; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp new file mode 100644 index 0000000..0167090 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -0,0 +1,2894 @@ +//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that Hexagon uses to lower LLVM code +// into a selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "HexagonISelLowering.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonTargetObjectFile.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-lowering" + +static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables", + cl::init(true), cl::Hidden, + cl::desc("Control jump table emission on Hexagon target")); + +static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon SDNode scheduling")); + +static cl::opt<bool> EnableFastMath("ffast-math", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Fast Math processing")); + +static cl::opt<int> MinimumJumpTables("minimum-jump-tables", + cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Set minimum jump tables")); + +static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy", + cl::Hidden, cl::ZeroOrMore, cl::init(6), + cl::desc("Max #stores to inline memcpy")); + +static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", + cl::Hidden, cl::ZeroOrMore, cl::init(4), + cl::desc("Max #stores to inline memcpy")); + +static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove", + cl::Hidden, cl::ZeroOrMore, cl::init(6), + cl::desc("Max #stores to inline memmove")); + +static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", + cl::Hidden, cl::ZeroOrMore, cl::init(4), + cl::desc("Max #stores to inline memmove")); + +static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset", + cl::Hidden, cl::ZeroOrMore, cl::init(8), + cl::desc("Max #stores to inline memset")); + +static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", + cl::Hidden, cl::ZeroOrMore, cl::init(4), + cl::desc("Max #stores to inline memset")); + + +namespace { +class HexagonCCState : public CCState { + unsigned NumNamedVarArgParams; + +public: + HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, + int NumNamedVarArgParams) + : CCState(CC, isVarArg, MF, locs, C), + NumNamedVarArgParams(NumNamedVarArgParams) {} + + unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } +}; +} + +// Implement calling convention for Hexagon. + +static bool IsHvxVectorType(MVT ty); + +static bool +CC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + HexagonCCState &HState = static_cast<HexagonCCState &>(State); + + if (ValNo < HState.getNumNamedVarArgParams()) { + // Deal with named arguments. + return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); + } + + // Deal with un-named arguments. + unsigned ofst; + if (ArgFlags.isByVal()) { + // If pass-by-value, the size allocated on stack is decided + // by ArgFlags.getByValSize(), not by the size of LocVT. + ofst = State.AllocateStack(ArgFlags.getByValSize(), + ArgFlags.getByValAlign()); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + ofst = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + ofst = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 || + LocVT == MVT::v16i8) { + ofst = State.AllocateStack(16, 16); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || + LocVT == MVT::v32i8) { + ofst = State.AllocateStack(32, 32); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + LocVT == MVT::v64i8 || LocVT == MVT::v512i1) { + ofst = State.AllocateStack(64, 64); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) { + ofst = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || + LocVT == MVT::v256i8) { + ofst = State.AllocateStack(256, 256); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + + llvm_unreachable(nullptr); +} + + +static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { + if (ArgFlags.isByVal()) { + // Passed on stack. + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), + ArgFlags.getByValAlign()); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + + if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; + } + + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) { + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + + if (IsHvxVectorType(LocVT)) { + if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + return true; // CC didn't match. +} + + +static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const MCPhysReg RegList[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + static const MCPhysReg RegList1[] = { + Hexagon::D1, Hexagon::D2 + }; + static const MCPhysReg RegList2[] = { + Hexagon::R1, Hexagon::R3 + }; + if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1, + Hexagon::V2, Hexagon::V3, + Hexagon::V4, Hexagon::V5, + Hexagon::V6, Hexagon::V7, + Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, + Hexagon::V12, Hexagon::V13, + Hexagon::V14, Hexagon::V15}; + static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1, + Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, + Hexagon::W6, Hexagon::W7}; + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + if ((UseHVX && !UseHVXDbl) && + (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) { + if (unsigned Reg = State.AllocateReg(VecLstS)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(64, 64); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + if ((UseHVX && !UseHVXDbl) && + (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8)) { + if (unsigned Reg = State.AllocateReg(VecLstD)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + // 128B Mode + if ((UseHVX && UseHVXDbl) && + (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || + LocVT == MVT::v256i8)) { + if (unsigned Reg = State.AllocateReg(VecLstD)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(256, 256); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + if ((UseHVX && UseHVXDbl) && + (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) { + if (unsigned Reg = State.AllocateReg(VecLstS)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + return true; +} + +static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + if (LocVT == MVT::i1 || + LocVT == MVT::i8 || + LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 || + LocVT == MVT::v16i32 || LocVT == MVT::v8i64 || + LocVT == MVT::v512i1) { + LocVT = MVT::v16i32; + ValVT = MVT::v16i32; + LocInfo = CCValAssign::Full; + } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 || + LocVT == MVT::v32i32 || LocVT == MVT::v16i64 || + (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) { + LocVT = MVT::v32i32; + ValVT = MVT::v32i32; + LocInfo = CCValAssign::Full; + } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 || + LocVT == MVT::v64i32 || LocVT == MVT::v32i64) { + LocVT = MVT::v64i32; + ValVT = MVT::v64i32; + LocInfo = CCValAssign::Full; + } + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) { + if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + return true; // CC didn't match. +} + +static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + if (unsigned Reg = State.AllocateReg(Hexagon::R0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + unsigned OffSiz = 64; + if (LocVT == MVT::v16i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::V0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } else if (LocVT == MVT::v32i32) { + unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0; + if (unsigned Reg = State.AllocateReg(Req)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + OffSiz = 128; + } else if (LocVT == MVT::v64i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::W0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + OffSiz = 256; + } + + unsigned Offset = State.AllocateStack(OffSiz, OffSiz); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + } +} + +SDValue +HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) +const { + return SDValue(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. Sometimes what we are copying is the end of a +/// larger object, the part that does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + SDLoc dl) { + + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, + MachinePointerInfo(), MachinePointerInfo()); +} + +static bool IsHvxVectorType(MVT ty) { + return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 || + ty == MVT::v64i8 || + ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 || + ty == MVT::v128i8 || + ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 || + ty == MVT::v256i8 || + ty == MVT::v512i1 || ty == MVT::v1024i1); +} + +// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is +// passed by value, the function prototype is modified to return void and +// the value is stored in memory pointed by a pointer passed by caller. +SDValue +HexagonTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc dl, SelectionDAG &DAG) const { + + // CCValAssign - represent the assignment of the return value to locations. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + // Analyze return values of ISD::RET + CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); + + SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps); +} + +bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + // If either no tail call or told not to tail call at all, don't. + auto Attr = + CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); + if (!CI->isTailCall() || Attr.getValueAsString() == "true") + return false; + + return true; +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. Returns a SDNode with the same number of values as the +/// ISD::CALL. +SDValue +HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, + SDValue Callee) const { + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, + RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +/// LowerCall - Functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +SDValue +HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &dl = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + bool doesNotReturn = CLI.DoesNotReturn; + + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + MachineFunction &MF = DAG.getMachineFunction(); + auto PtrVT = getPointerTy(MF.getDataLayout()); + + // Check for varargs. + int NumNamedVarArgParams = -1; + if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = GAN->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); + if (const Function* F = dyn_cast<Function>(GV)) { + // If a function has zero args and is a vararg function, that's + // disallowed so it must be an undeclared function. Do not assume + // varargs if the callee is undefined. + if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0) + NumNamedVarArgParams = F->getFunctionType()->getNumParams(); + } + } + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext(), NumNamedVarArgParams); + + if (isVarArg) + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg); + else + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); + + auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + if (Attr.getValueAsString() == "true") + isTailCall = false; + + if (isTailCall) { + bool StructAttrFlag = MF.getFunction()->hasStructRetAttr(); + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + isVarArg, IsStructRet, + StructAttrFlag, + Outs, OutVals, Ins, DAG); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + isTailCall = false; + break; + } + } + DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n" + : "Argument must be passed on stack. " + "Not eligible for Tail Call\n")); + } + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + + auto &HRI = *Subtarget.getRegisterInfo(); + SDValue StackPtr = + DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT); + + bool NeedsArgAlign = false; + unsigned LargestAlignSeen = 0; + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + // Record if we need > 8 byte alignment on an argument. + bool ArgAlign = IsHvxVectorType(VA.getValVT()); + NeedsArgAlign |= ArgAlign; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + // Loc info must be one of Full, SExt, ZExt, or AExt. + llvm_unreachable("Unknown loc info!"); + case CCValAssign::BCvt: + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isMemLoc()) { + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue MemAddr = DAG.getConstant(LocMemOffset, dl, + StackPtr.getValueType()); + MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr); + if (ArgAlign) + LargestAlignSeen = std::max(LargestAlignSeen, + VA.getLocVT().getStoreSizeInBits() >> 3); + if (Flags.isByVal()) { + // The argument is a struct passed by value. According to LLVM, "Arg" + // is is pointer. + MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain, + Flags, DAG, dl)); + } else { + MachinePointerInfo LocPI = MachinePointerInfo::getStack( + DAG.getMachineFunction(), LocMemOffset); + SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false, + false, 0); + MemOpChains.push_back(S); + } + continue; + } + + // Arguments that can be passed on register must be kept at RegsToPass + // vector. + if (VA.isRegLoc()) + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } + + if (NeedsArgAlign && Subtarget.hasV60TOps()) { + DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); + MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo(); + // V6 vectors passed by value have 64 or 128 byte alignment depending + // on whether we are 64 byte vector mode or 128 byte. + bool UseHVXDbl = Subtarget.useHVXDblOps(); + assert(Subtarget.useHVXOps()); + const unsigned ObjAlign = UseHVXDbl ? 128 : 64; + LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign); + MFI->ensureMaxAlignment(LargestAlignSeen); + } + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); + + if (!isTailCall) { + SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true); + Chain = DAG.getCALLSEQ_START(Chain, C, dl); + } + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + if (!isTailCall) { + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + } else { + // For tail calls lower the arguments to the 'real' stack slot. + // + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + // + // Do not flag preceding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag = SDValue(); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT); + } else if (ExternalSymbolSDNode *S = + dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); + } + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + if (isTailCall) { + MF.getFrameInfo()->setHasTailCall(); + return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); + } + + int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3; + Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), + DAG.getIntPtrConstant(0, dl, true), InFlag, dl); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, OutVals, Callee); +} + +static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD) + return false; + + auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget()); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + bool ValidHVXDblType = + (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 || + VT == MVT::v64i16 || VT == MVT::v128i8); + bool ValidHVXType = + UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 || + VT == MVT::v32i16 || VT == MVT::v64i8); + + if (ValidHVXDblType || ValidHVXType || + VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + isInc = (Ptr->getOpcode() == ISD::ADD); + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + // Ensure that Offset is a constant. + return (isa<ConstantSDNode>(Offset)); + } + + return false; +} + +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const +{ + EVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + VT = ST->getMemoryVT(); + if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) { + return false; + } + } else { + return false; + } + + bool isInc = false; + bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (isLegal) { + auto &HII = *Subtarget.getInstrInfo(); + int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); + if (HII.isValidAutoIncImm(VT, OffsetVal)) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } + } + + return false; +} + +SDValue +HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + MachineFunction &MF = DAG.getMachineFunction(); + auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); + switch (Node->getOpcode()) { + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + if (FuncInfo.hasClobberLR()) + break; + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_Mem: { + for (; NumVals; --NumVals, ++i) {} + break; + } + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + unsigned Reg = + cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + + // Check it to be lr + const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo(); + if (Reg == QRI->getRARegister()) { + FuncInfo.setHasClobberLR(true); + break; + } + } + break; + } + } + } + } + } // Node->getOpcode + return Op; +} + +SDValue +HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDLoc dl(Op); + + ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align); + assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC"); + + unsigned A = AlignConst->getSExtValue(); + auto &HFI = *Subtarget.getFrameLowering(); + // "Zero" means natural stack alignment. + if (A == 0) + A = HFI.getStackAlignment(); + + DEBUG({ + dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: "; + Size.getNode()->dump(&DAG); + dbgs() << "\n"; + }); + + SDValue AC = DAG.getConstant(A, dl, MVT::i32); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC); + if (Op.getNode()->getHasDebugValue()) + DAG.TransferDbgValues(Op, AA); + return AA; +} + +SDValue +HexagonTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) +const { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon); + + // For LLVM, in the case when returning a struct by value (>8byte), + // the first argument is a pointer that points to the location on caller's + // stack where the return value will be stored. For Hexagon, the location on + // caller's stack is passed only when the struct size is smaller than (and + // equal to) 8 bytes. If not, no address will be passed into callee and + // callee return the result direclty through R0/R1. + + SmallVector<SDValue, 8> MemOps; + bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + unsigned ObjSize; + unsigned StackLocation; + int FI; + + if ( (VA.isRegLoc() && !Flags.isByVal()) + || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) { + // Arguments passed in registers + // 1. int, long long, ptr args that get allocated in register. + // 2. Large struct that gets an register to put its address in. + EVT RegVT = VA.getLocVT(); + if (RegVT == MVT::i8 || RegVT == MVT::i16 || + RegVT == MVT::i32 || RegVT == MVT::f32) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (RegVT == MVT::i64 || RegVT == MVT::f64) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + + // Single Vector + } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 || + RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (UseHVX && UseHVXDbl && + ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + + // Double Vector + } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (UseHVX && UseHVXDbl && + ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 || + RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) { + assert(0 && "need to support VecPred regs"); + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else { + assert (0); + } + } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) { + assert (0 && "ByValSize must be bigger than 8 bytes"); + } else { + // Sanity check. + assert(VA.isMemLoc()); + + if (Flags.isByVal()) { + // If it's a byval parameter, then we need to compute the + // "real" size, not the size of the pointer. + ObjSize = Flags.getByValSize(); + } else { + ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3; + } + + StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset(); + // Create the frame index object for this incoming parameter... + FI = MFI->CreateFixedObject(ObjSize, StackLocation, true); + + // Create the SelectionDAG nodes cordl, responding to a load + // from this parameter. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + + if (Flags.isByVal()) { + // If it's a pass-by-value aggregate, then do not dereference the stack + // location. Instead, we should generate a reference to the stack + // location. + InVals.push_back(FIN); + } else { + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo(), false, false, + false, 0)); + } + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + + if (isVarArg) { + // This will point to the next argument passed via stack. + int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize, + HEXAGON_LRFP_SIZE + + CCInfo.getNextStackOffset(), + true); + FuncInfo.setVarArgsFrameIndex(FrameIndex); + } + + return Chain; +} + +SDValue +HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // VASTART stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + MachineFunction &MF = DAG.getMachineFunction(); + HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>(); + SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, + Op.getOperand(1), MachinePointerInfo(SV), false, + false, 0); +} + +// Creates a SPLAT instruction for a constant value VAL. +static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) { + if (VT.getSimpleVT() == MVT::v4i8) + return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); + + if (VT.getSimpleVT() == MVT::v4i16) + return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val); + + return SDValue(); +} + +static bool isSExtFree(SDValue N) { + // A sign-extend of a truncate of a sign-extend is free. + if (N.getOpcode() == ISD::TRUNCATE && + N.getOperand(0).getOpcode() == ISD::AssertSext) + return true; + // We have sign-extended loads. + if (N.getOpcode() == ISD::LOAD) + return true; + return false; +} + +SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue InpVal = Op.getOperand(0); + if (isa<ConstantSDNode>(InpVal)) { + uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue(); + return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64); + } + SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut); +} + +SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Cmp = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get(); + + EVT VT = Op.getValueType(); + EVT LHSVT = LHS.getValueType(); + EVT RHSVT = RHS.getValueType(); + + if (LHSVT == MVT::v2i16) { + assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC)); + unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS); + SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS); + SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp); + return SC; + } + + // Treat all other vector types as legal. + if (VT.isVector()) + return Op; + + // Equals and not equals should use sign-extend, not zero-extend, since + // we can represent small negative values in the compare instructions. + // The LLVM default is to use zero-extend arbitrarily in these cases. + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && + (RHSVT == MVT::i8 || RHSVT == MVT::i16) && + (LHSVT == MVT::i8 || LHSVT == MVT::i16)) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + if (C && C->getAPIntValue().isNegative()) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + if (isSExtFree(LHS) || isSExtFree(RHS)) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + } + return SDValue(); +} + +SDValue +HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { + SDValue PredOp = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); + EVT OpVT = Op1.getValueType(); + SDLoc DL(Op); + + if (OpVT == MVT::v2i16) { + SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1); + SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2); + SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2); + SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL); + return TR; + } + + return SDValue(); +} + +// Handle only specific vector loads. +SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = LoadNode->getChain(); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + SDValue Result; + SDValue Base = LoadNode->getBasePtr(); + ISD::LoadExtType Ext = LoadNode->getExtensionType(); + unsigned Alignment = LoadNode->getAlignment(); + SDValue LoadChain; + + if(Ext == ISD::NON_EXTLOAD) + Ext = ISD::ZEXTLOAD; + + if (VT == MVT::v4i16) { + if (Alignment == 2) { + SDValue Loads[4]; + // Base load. + Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base+2 load. + SDValue Increment = DAG.getConstant(2, DL, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base and base+2. + SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32); + SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); + SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]); + // Base + 4. + Increment = DAG.getConstant(4, DL, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base + 6. + Increment = DAG.getConstant(6, DL, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base+4 and base+6. + Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); + SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); + // Combine to i64. This could be optimised out later if we can + // affect reg allocation of this code. + Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2); + LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Loads[0].getValue(1), Loads[1].getValue(1), + Loads[2].getValue(1), Loads[3].getValue(1)); + } else { + // Perform default type expansion. + Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), + LoadNode->isVolatile(), LoadNode->isNonTemporal(), + LoadNode->isInvariant(), LoadNode->getAlignment()); + LoadChain = Result.getValue(1); + } + } else + llvm_unreachable("Custom lowering unsupported load"); + + Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); + // Since we pretend to lower a load, we need the original chain + // info attached to the result. + SDValue Ops[] = { Result, LoadChain }; + + return DAG.getMergeValues(Ops, DL); +} + + +SDValue +HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { + EVT ValTy = Op.getValueType(); + ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op); + unsigned Align = CPN->getAlignment(); + Reloc::Model RM = HTM.getRelocationModel(); + unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0; + + SDValue T; + if (CPN->isMachineConstantPoolEntry()) + T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF); + else + T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF); + if (RM == Reloc::PIC_) + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T); + return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T); +} + +SDValue +HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int Idx = cast<JumpTableSDNode>(Op)->getIndex(); + Reloc::Model RM = HTM.getRelocationModel(); + if (RM == Reloc::PIC_) { + SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T); + } + + SDValue T = DAG.getTargetJumpTable(Idx, VT); + return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T); +} + +SDValue +HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(4, dl, MVT::i32); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + MachinePointerInfo(), false, false, false, 0); + } + + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); +} + +SDValue +HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); + MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + HRI.getFrameRegister(), VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), + false, false, false, 0); + return FrameAddr; +} + +SDValue +HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const { + SDLoc dl(Op); + return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + +SDValue +HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + auto *GAN = cast<GlobalAddressSDNode>(Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + auto *GV = GAN->getGlobal(); + int64_t Offset = GAN->getOffset(); + + auto &HLOF = *HTM.getObjFileLowering(); + Reloc::Model RM = HTM.getRelocationModel(); + + if (RM == Reloc::Static) { + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); + if (HLOF.IsGlobalInSmallSection(GV, HTM)) + return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA); + return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA); + } + + bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() || + (GV->hasLocalLinkage() && !isa<Function>(GV)); + if (UsePCRel) { + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset, + HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA); + } + + // Use GOT index. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT); + SDValue Off = DAG.getConstant(Offset, dl, MVT::i32); + return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off); +} + +// Specifies that for loads and stores VT can be promoted to PromotedLdStVT. +SDValue +HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + SDLoc dl(Op); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + Reloc::Model RM = HTM.getRelocationModel(); + if (RM == Reloc::Static) { + SDValue A = DAG.getTargetBlockAddress(BA, PtrVT); + return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A); + } + + SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A); +} + +SDValue +HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) + const { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT, + HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym); +} + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &ST) + : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)), + Subtarget(ST) { + bool IsV4 = !Subtarget.hasV5TOps(); + auto &HRI = *Subtarget.getRegisterInfo(); + bool UseHVX = Subtarget.useHVXOps(); + bool UseHVXSgl = Subtarget.useHVXSglOps(); + bool UseHVXDbl = Subtarget.useHVXDblOps(); + + setPrefLoopAlignment(4); + setPrefFunctionAlignment(4); + setMinFunctionAlignment(2); + setInsertFencesForAtomic(false); + setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); + + if (EnableHexSDNodeSched) + setSchedulingPreference(Sched::VLIW); + else + setSchedulingPreference(Sched::Source); + + // Limits for inline expansion of memcpy/memmove + MaxStoresPerMemcpy = MaxStoresPerMemcpyCL; + MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL; + MaxStoresPerMemmove = MaxStoresPerMemmoveCL; + MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL; + MaxStoresPerMemset = MaxStoresPerMemsetCL; + MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL; + + // + // Set up register classes. + // + + addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); + addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa + addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa + addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba + addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass); + + if (Subtarget.hasV5TOps()) { + addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); + } + + if (Subtarget.hasV60TOps()) { + if (Subtarget.useHVXSglOps()) { + addRegisterClass(MVT::v64i8, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v8i64, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass); + } else if (Subtarget.useHVXDblOps()) { + addRegisterClass(MVT::v128i8, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v16i64, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v256i8, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v64i32, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v32i64, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass); + } + + } + + // + // Handling of scalar operations. + // + // All operations default to "legal", except: + // - indexed loads and stores (pre-/post-incremented), + // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS, + // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN, + // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP, + // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG, + // which default to "expand" for at least one type. + + // Misc operations. + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand + + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + // Custom legalize GlobalAddress nodes into CONST32. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + + // Hexagon needs to optimize cases with negative constants. + setOperationAction(ISD::SETCC, MVT::i8, Custom); + setOperationAction(ISD::SETCC, MVT::i16, Custom); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + + if (EmitJumpTables) + setMinimumJumpTableEntries(2); + else + setMinimumJumpTableEntries(MinimumJumpTables); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + // Hexagon has instructions for add/sub with carry. The problem with + // modeling these instructions is that they produce 2 results: Rdd and Px. + // To model the update of Px, we will have to use Defs[p0..p3] which will + // cause any predicate live range to spill. So, we pretend we dont't have + // these instructions. + setOperationAction(ISD::ADDE, MVT::i8, Expand); + setOperationAction(ISD::ADDE, MVT::i16, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i8, Expand); + setOperationAction(ISD::SUBE, MVT::i16, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + setOperationAction(ISD::ADDC, MVT::i8, Expand); + setOperationAction(ISD::ADDC, MVT::i16, Expand); + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i8, Expand); + setOperationAction(ISD::SUBC, MVT::i16, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + + // Only add and sub that detect overflow are the saturating ones. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::UADDO, VT, Expand); + setOperationAction(ISD::SADDO, VT, Expand); + setOperationAction(ISD::USUBO, VT, Expand); + setOperationAction(ISD::SSUBO, VT, Expand); + } + + setOperationAction(ISD::CTLZ, MVT::i8, Promote); + setOperationAction(ISD::CTLZ, MVT::i16, Promote); + setOperationAction(ISD::CTTZ, MVT::i8, Promote); + setOperationAction(ISD::CTTZ, MVT::i16, Promote); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Promote); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Promote); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote); + + // In V5, popcount can count # of 1s in i64 but returns i32. + // On V4 it will be expanded (set later). + setOperationAction(ISD::CTPOP, MVT::i8, Promote); + setOperationAction(ISD::CTPOP, MVT::i16, Promote); + setOperationAction(ISD::CTPOP, MVT::i32, Promote); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); + + // We custom lower i64 to i64 mul, so that it is not considered as a legal + // operation. There is a pattern that will match i64 mul and transform it + // to a series of instructions. + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + + for (unsigned IntExpOp : + { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, + ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR, + ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, + ISD::SMUL_LOHI, ISD::UMUL_LOHI }) { + setOperationAction(IntExpOp, MVT::i32, Expand); + setOperationAction(IntExpOp, MVT::i64, Expand); + } + + for (unsigned FPExpOp : + {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS, + ISD::FPOW, ISD::FCOPYSIGN}) { + setOperationAction(FPExpOp, MVT::f32, Expand); + setOperationAction(FPExpOp, MVT::f64, Expand); + } + + // No extending loads from i32. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + } + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // Turn FP extload into load/fextend. + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + + // Expand BR_CC and SELECT_CC for all integer and fp types. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::BR_CC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + } + for (MVT VT : MVT::fp_valuetypes()) { + setOperationAction(ISD::BR_CC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + } + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // + // Handling of vector operations. + // + + // Custom lower v4i16 load only. Let v4i16 store to be + // promoted for now. + promoteLdStType(MVT::v4i8, MVT::i32); + promoteLdStType(MVT::v2i16, MVT::i32); + promoteLdStType(MVT::v8i8, MVT::i64); + promoteLdStType(MVT::v2i32, MVT::i64); + + setOperationAction(ISD::LOAD, MVT::v4i16, Custom); + setOperationAction(ISD::STORE, MVT::v4i16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64); + AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64); + + // Set the action for vector operations to "expand", then override it with + // either "custom" or "legal" for specific cases. + static const unsigned VectExpOps[] = { + // Integer arithmetic: + ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC, + ISD::SUBC, ISD::SADDO, ISD::UADDO, ISD::SSUBO, ISD::USUBO, + ISD::SMUL_LOHI, ISD::UMUL_LOHI, + // Logical/bit: + ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR, + ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::CTLZ_ZERO_UNDEF, + ISD::CTTZ_ZERO_UNDEF, + // Floating point arithmetic/math functions: + ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV, + ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN, + ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, + ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, + ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, + // Misc: + ISD::SELECT, ISD::ConstantPool, + // Vector: + ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR, + ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT, + ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR, + ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE + }; + + for (MVT VT : MVT::vector_valuetypes()) { + for (unsigned VectExpOp : VectExpOps) + setOperationAction(VectExpOp, VT, Expand); + + // Expand all extended loads and truncating stores: + for (MVT TargetVT : MVT::vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand); + setTruncStoreAction(VT, TargetVT, Expand); + } + + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + } + + // Types natively supported: + for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1, + MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32, + MVT::v2i32, MVT::v1i64}) { + setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, NativeVT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, NativeVT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, NativeVT, Custom); + + setOperationAction(ISD::ADD, NativeVT, Legal); + setOperationAction(ISD::SUB, NativeVT, Legal); + setOperationAction(ISD::MUL, NativeVT, Legal); + setOperationAction(ISD::AND, NativeVT, Legal); + setOperationAction(ISD::OR, NativeVT, Legal); + setOperationAction(ISD::XOR, NativeVT, Legal); + } + + setOperationAction(ISD::SETCC, MVT::v2i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + if (UseHVX) { + if (UseHVXSgl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); + } else if (UseHVXDbl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + } else { + llvm_unreachable("Unrecognized HVX mode"); + } + } + // Subtarget-specific operation actions. + // + if (Subtarget.hasV5TOps()) { + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + + } else { // V4 + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i8, Expand); + setOperationAction(ISD::CTPOP, MVT::i16, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + // Expand these operations for both f32 and f64: + for (unsigned FPExpOpV4 : + {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) { + setOperationAction(FPExpOpV4, MVT::f32, Expand); + setOperationAction(FPExpOpV4, MVT::f64, Expand); + } + + for (ISD::CondCode FPExpCCV4 : + {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE, + ISD::SETUO, ISD::SETO}) { + setCondCodeAction(FPExpCCV4, MVT::f32, Expand); + setCondCodeAction(FPExpCCV4, MVT::f64, Expand); + } + } + + // Handling of indexed loads/stores: default is "expand". + // + for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { + setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal); + setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal); + } + + if (UseHVXDbl) { + for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); + } + } + + computeRegisterProperties(&HRI); + + // + // Library calls for unsupported operations + // + bool FastMath = EnableFastMath; + + setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); + setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); + setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); + setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); + setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); + setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); + setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); + setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); + + setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); + setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); + setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); + setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); + setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); + + if (IsV4) { + // Handle single-precision floating point operations on V4. + if (FastMath) { + setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3"); + setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3"); + setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3"); + setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2"); + setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2"); + // Double-precision compares. + setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2"); + setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2"); + } else { + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); + setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); + setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); + // Double-precision compares. + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); + } + } + + // This is the only fast library function for sqrtd. + if (FastMath) + setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2"); + + // Prefix is: nothing for "slow-math", + // "fast2_" for V4 fast-math and V5+ fast-math double-precision + // (actually, keep fast-math and fast-math2 separate for now) + if (FastMath) { + setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3"); + setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3"); + setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3"); + setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3"); + // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok). + setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3"); + } else { + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); + setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); + } + + if (Subtarget.hasV5TOps()) { + if (FastMath) + setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf"); + else + setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf"); + } else { + // V4 + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); + setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); + setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); + setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); + setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); + setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); + setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); + setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); + setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); + setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); + setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); + setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); + setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); + } + + // These cause problems when the shift amount is non-constant. + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); +} + + +const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((HexagonISD::NodeType)Opcode) { + case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA"; + case HexagonISD::ARGEXTEND: return "HexagonISD::ARGEXTEND"; + case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT"; + case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL"; + case HexagonISD::BARRIER: return "HexagonISD::BARRIER"; + case HexagonISD::CALLR: return "HexagonISD::CALLR"; + case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; + case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; + case HexagonISD::COMBINE: return "HexagonISD::COMBINE"; + case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::CP: return "HexagonISD::CP"; + case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH"; + case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; + case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU"; + case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP"; + case HexagonISD::FCONST32: return "HexagonISD::FCONST32"; + case HexagonISD::INSERT: return "HexagonISD::INSERT"; + case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; + case HexagonISD::JT: return "HexagonISD::JT"; + case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; + case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; + case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; + case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB"; + case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ"; + case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT"; + case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU"; + case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ"; + case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT"; + case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU"; + case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; + case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; + case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; + case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; + case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; + case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; + case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH"; + case HexagonISD::VSRAH: return "HexagonISD::VSRAH"; + case HexagonISD::VSRAW: return "HexagonISD::VSRAW"; + case HexagonISD::VSRLH: return "HexagonISD::VSRLH"; + case HexagonISD::VSRLW: return "HexagonISD::VSRLW"; + case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH"; + case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW"; + case HexagonISD::OP_END: break; + } + return nullptr; +} + +bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + EVT MTy1 = EVT::getEVT(Ty1); + EVT MTy2 = EVT::getEVT(Ty2); + if (!MTy1.isSimple() || !MTy2.isSimple()) + return false; + return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32); +} + +bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isSimple() || !VT2.isSimple()) + return false; + return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32); +} + +// shouldExpandBuildVectorWithShuffles +// Should we expand the build vector with shuffles? +bool +HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const { + + // Hexagon vector shuffle operates on element sizes of bytes or halfwords + EVT EltVT = VT.getVectorElementType(); + int EltBits = EltVT.getSizeInBits(); + if ((EltBits != 8) && (EltBits != 16)) + return false; + + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + +// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and +// V2 are the two vectors to select data from, V3 is the permutation. +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + if (V2.getOpcode() == ISD::UNDEF) + V2 = V1; + + if (SVN->isSplat()) { + int Lane = SVN->getSplatIndex(); + if (Lane == -1) Lane = 0; + + // Test if V1 is a SCALAR_TO_VECTOR. + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa<ConstantSDNode>(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + } + return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32)); + } + + // FIXME: We need to support more general vector shuffles. See + // below the comment from the ARM backend that deals in the general + // case with the vector shuffles. For now, let expand handle these. + return SDValue(); + + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. +} + +// If BUILD_VECTOR has same base element repeated several times, +// report true. +static bool isCommonSplatElement(BuildVectorSDNode *BVN) { + unsigned NElts = BVN->getNumOperands(); + SDValue V0 = BVN->getOperand(0); + + for (unsigned i = 1, e = NElts; i != e; ++i) { + if (BVN->getOperand(i) != V0) + return false; + } + return true; +} + +// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert +// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific +// <VT> = SHL/SRA/SRL <VT> by <IT/i32>. +static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = 0; + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDValue V3; + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) && + isCommonSplatElement(BVN)) + V3 = V2; + else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) && + isCommonSplatElement(BVN)) + V3 = V1; + else + return SDValue(); + + SDValue CommonSplat = BVN->getOperand(0); + SDValue Result; + + if (VT.getSimpleVT() == MVT::v4i16) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else if (VT.getSimpleVT() == MVT::v2i32) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else { + return SDValue(); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); +} + +SDValue +HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { + BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + unsigned Size = VT.getSizeInBits(); + + // Only handle vectors of 64 bits or shorter. + if (Size > 64) + return SDValue(); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned NElts = BVN->getNumOperands(); + + // Try to generate a SPLAT instruction. + if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) && + (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, true) && SplatBitSize <= 16)) { + unsigned SplatBits = APSplatBits.getZExtValue(); + int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >> + (32 - SplatBitSize)); + return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32)); + } + + // Try to generate COMBINE to build v2i32 vectors. + if (VT.getSimpleVT() == MVT::v2i32) { + SDValue V0 = BVN->getOperand(0); + SDValue V1 = BVN->getOperand(1); + + if (V0.getOpcode() == ISD::UNDEF) + V0 = DAG.getConstant(0, dl, MVT::i32); + if (V1.getOpcode() == ISD::UNDEF) + V1 = DAG.getConstant(0, dl, MVT::i32); + + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0); + ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1); + // If the element isn't a constant, it is in a register: + // generate a COMBINE Register Register instruction. + if (!C0 || !C1) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + + // If one of the operands is an 8 bit integer constant, generate + // a COMBINE Immediate Immediate instruction. + if (isInt<8>(C0->getSExtValue()) || + isInt<8>(C1->getSExtValue())) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + } + + // Try to generate a S2_packhl to build v2i16 vectors. + if (VT.getSimpleVT() == MVT::v2i16) { + for (unsigned i = 0, e = NElts; i != e; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i)); + // If the element isn't a constant, it is in a register: + // generate a S2_packhl instruction. + if (!Cst) { + SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16, + BVN->getOperand(1), BVN->getOperand(0)); + + return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16, + pack); + } + } + } + + // In the general case, generate a CONST32 or a CONST64 for constant vectors, + // and insert_vector_elt for all the other cases. + uint64_t Res = 0; + unsigned EltSize = Size / NElts; + SDValue ConstVal; + uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize); + bool HasNonConstantElements = false; + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's + // combine, const64, etc. are Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (Operand.getOpcode() == ISD::UNDEF) + continue; + + int64_t Val = 0; + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand)) + Val = Cst->getSExtValue(); + else + HasNonConstantElements = true; + + Val &= Mask; + Res = (Res << EltSize) | Val; + } + + if (Size == 64) + ConstVal = DAG.getConstant(Res, dl, MVT::i64); + else + ConstVal = DAG.getConstant(Res, dl, MVT::i32); + + // When there are non constant operands, add them with INSERT_VECTOR_ELT to + // ConstVal, the constant part of the vector. + if (HasNonConstantElements) { + EVT EltVT = VT.getVectorElementType(); + SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), dl, MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, dl, MVT::i64)); + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon + // is Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (isa<ConstantSDNode>(Operand)) + // This operand is already in ConstVal. + continue; + + if (VT.getSizeInBits() == 64 && + Operand.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, dl, MVT::i32); + Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + } + + SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + const SDValue Ops[] = {ConstVal, Operand, Combined}; + + if (VT.getSizeInBits() == 32) + ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + else + ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + } + } + + return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); +} + +SDValue +HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + bool UseHVX = Subtarget.useHVXOps(); + EVT VT = Op.getValueType(); + unsigned NElts = Op.getNumOperands(); + SDValue Vec0 = Op.getOperand(0); + EVT VecVT = Vec0.getValueType(); + unsigned Width = VecVT.getSizeInBits(); + + if (NElts == 2) { + MVT ST = VecVT.getSimpleVT(); + // We are trying to concat two v2i16 to a single v4i16, or two v4i8 + // into a single v8i8. + if (ST == MVT::v2i16 || ST == MVT::v4i8) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); + + if (UseHVX) { + assert((Width == 64*8 && Subtarget.useHVXSglOps()) || + (Width == 128*8 && Subtarget.useHVXDblOps())); + SDValue Vec1 = Op.getOperand(1); + MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32; + MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32; + SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); + SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); + SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); + return DAG.getNode(ISD::BITCAST, dl, VT, VC); + } + } + + if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64) + return SDValue(); + + SDValue C0 = DAG.getConstant(0, dl, MVT::i64); + SDValue C32 = DAG.getConstant(32, dl, MVT::i64); + SDValue W = DAG.getConstant(Width, dl, MVT::i64); + // Create the "width" part of the argument to insert_rp/insertp_rp. + SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32); + SDValue V = C0; + + for (unsigned i = 0, e = NElts; i != e; ++i) { + unsigned N = NElts-i-1; + SDValue OpN = Op.getOperand(N); + + if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, dl, MVT::i32); + OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); + } + SDValue Idx = DAG.getConstant(N, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); + if (VT.getSizeInBits() == 32) + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); + else + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, V); +} + +SDValue +HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Idx = Op.getOperand(1); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ? + EltSize : VTN * EltSize, dl, MVT::i64); + + // Constant element number. + if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Idx)) { + uint64_t X = CI->getZExtValue(); + SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32); + const SDValue Ops[] = {Vec, Width, Offset}; + + ConstantSDNode *CW = dyn_cast<ConstantSDNode>(Width); + assert(CW && "Non constant width in LowerEXTRACT_VECTOR"); + + SDValue N; + MVT SVT = VecVT.getSimpleVT(); + uint64_t W = CW->getZExtValue(); + + if (W == 32) { + // Translate this node into EXTRACT_SUBREG. + unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0; + + if (X == 0) + Subreg = Hexagon::subreg_loreg; + else if (SVT == MVT::v2i32 && X == 1) + Subreg = Hexagon::subreg_hireg; + else if (SVT == MVT::v4i16 && X == 2) + Subreg = Hexagon::subreg_hireg; + else if (SVT == MVT::v8i8 && X == 4) + Subreg = Hexagon::subreg_hireg; + else + llvm_unreachable("Bad offset"); + N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec); + + } else if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, N); + } + + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, dl, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, dl, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + + const SDValue Ops[] = {Vec, Combined}; + + SDValue N; + if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + } + return DAG.getNode(ISD::BITCAST, dl, VT, N); +} + +SDValue +HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ? + EltSize : VTN * EltSize, dl, MVT::i64); + + if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) { + SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32); + const SDValue Ops[] = {Vec, Val, Width, Offset}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); + } + + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, dl, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, dl, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + + if (VT.getSizeInBits() == 64 && + Val.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, dl, MVT::i32); + Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val); + } + + const SDValue Ops[] = {Vec, Val, Combined}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); +} + +bool +HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { + // Assuming the caller does not have either a signext or zeroext modifier, and + // only one value is accepted, any reasonable truncation is allowed. + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + + // FIXME: in principle up to 64-bit could be made safe, but it would be very + // fragile at the moment: any support for multiple value returns would be + // liable to disallow tail calls involving i64 -> iN truncation in many cases. + return Ty1->getPrimitiveSizeInBits() <= 32; +} + +SDValue +HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + SDLoc dl(Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // Mark function as containing a call to EH_RETURN. + HexagonMachineFunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>(); + FuncInfo->setHasEHReturn(); + + unsigned OffsetReg = Hexagon::R28; + + SDValue StoreAddr = + DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT), + DAG.getIntPtrConstant(4, dl)); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), + false, false, 0); + Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset); + + // Not needed we already use it as explict input to EH_RETURN. + // MF.getRegInfo().addLiveOut(OffsetReg); + + return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain); +} + +SDValue +HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: +#ifndef NDEBUG + Op.getNode()->dumpr(&DAG); + if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END) + errs() << "Check for a non-legal type in this operation\n"; +#endif + llvm_unreachable("Should not custom lower this!"); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SRA: + case ISD::SHL: + case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + // Frame & Return address. Currently unimplemented. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + // Custom lower some vector loads. + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VSELECT: return LowerVSELECT(Op, DAG); + case ISD::CTPOP: return LowerCTPOP(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); + } +} + +/// Returns relocation base for the given PIC jumptable. +SDValue +HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + int Idx = cast<JumpTableSDNode>(Table)->getIndex(); + EVT VT = Table.getValueType(); + SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T); +} + +MachineBasicBlock * +HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) + const { + switch (MI->getOpcode()) { + case Hexagon::ALLOCA: { + MachineFunction *MF = BB->getParent(); + auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>(); + FuncInfo->addAllocaAdjustInst(MI); + return BB; + } + default: llvm_unreachable("Unexpected instr type to insert"); + } // switch +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +std::pair<unsigned, const TargetRegisterClass *> +HexagonTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); + + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': // R0-R31 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::i32: + case MVT::i16: + case MVT::i8: + case MVT::f32: + return std::make_pair(0U, &Hexagon::IntRegsRegClass); + case MVT::i64: + case MVT::f64: + return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); + } + case 'q': // q0-q3 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v1024i1: + case MVT::v512i1: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VecPredRegsRegClass); + } + case 'v': // V0-V31 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VectorRegsRegClass); + case MVT::v32i32: + case MVT::v64i16: + case MVT::v16i64: + case MVT::v128i8: + if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl) + return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass); + else + return std::make_pair(0U, &Hexagon::VecDblRegsRegClass); + case MVT::v256i8: + case MVT::v128i16: + case MVT::v64i32: + case MVT::v32i64: + return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass); + } + + default: + llvm_unreachable("Unknown asm register class"); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + return Subtarget.hasV5TOps(); +} + +/// isLegalAddressingMode - Return true if the addressing mode represented by +/// AM is legal for this target, for a load/store of the specified type. +bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + // Allows a signed-extended 11-bit immediate field. + if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + int Scale = AM.Scale; + if (Scale < 0) Scale = -Scale; + switch (Scale) { + case 0: // No scale reg, "r+i", "r", or just "i". + break; + default: // No scaled addressing mode. + return false; + } + return true; +} + +/// Return true if folding a constant offset with the given GlobalAddress is +/// legal. It is frequently not legal in PIC relocation models. +bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) + const { + return HTM.getRelocationModel() == Reloc::Static; +} + + +/// isLegalICmpImmediate - Return true if the specified immediate is legal +/// icmp immediate, that is the target has icmp instructions which can compare +/// a register against the immediate without having to materialize the +/// immediate into a register. +bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return Imm >= -512 && Imm <= 511; +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool HexagonTargetLowering::IsEligibleForTailCallOptimization( + SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // *************************************************************************** + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. + // *************************************************************************** + + // If this is a tail call via a function pointer, then don't do it! + if (!(isa<GlobalAddressSDNode>(Callee)) && + !(isa<ExternalSymbolSDNode>(Callee))) { + return false; + } + + // Do not optimize if the calling conventions do not match. + if (!CCMatch) + return false; + + // Do not tail call optimize vararg calls. + if (isVarArg) + return false; + + // Also avoid tail call optimization if either caller or callee uses struct + // return semantics. + if (isCalleeStructRet || isCallerStructRet) + return false; + + // In addition to the cases above, we also disable Tail Call Optimization if + // the calling convention code that at least one outgoing argument needs to + // go on the stack. We cannot check that here because at this point that + // information is not available. + return true; +} + +// Return true when the given node fits in a positive half word. +bool llvm::isPositiveHalfWord(SDNode *N) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue())) + return true; + + switch (N->getOpcode()) { + default: + return false; + case ISD::SIGN_EXTEND_INREG: + return true; + } +} + +std::pair<const TargetRegisterClass*, uint8_t> +HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const { + const TargetRegisterClass *RRC = nullptr; + + uint8_t Cost = 1; + switch (VT.SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(TRI, VT); + case MVT::v64i8: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v8i64: + RRC = &Hexagon::VectorRegsRegClass; + break; + case MVT::v128i8: + case MVT::v64i16: + case MVT::v32i32: + case MVT::v16i64: + if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() && + Subtarget.useHVXDblOps()) + RRC = &Hexagon::VectorRegs128BRegClass; + else + RRC = &Hexagon::VecDblRegsRegClass; + break; + case MVT::v256i8: + case MVT::v128i16: + case MVT::v64i32: + case MVT::v32i64: + RRC = &Hexagon::VecDblRegs128BRegClass; + break; + } + return std::make_pair(RRC, Cost); +} + +Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + BasicBlock *BB = Builder.GetInsertBlock(); + Module *M = BB->getParent()->getParent(); + Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); + unsigned SZ = Ty->getPrimitiveSizeInBits(); + assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported"); + Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked + : Intrinsic::hexagon_L4_loadd_locked; + Value *Fn = Intrinsic::getDeclaration(M, IntID); + return Builder.CreateCall(Fn, Addr, "larx"); +} + +/// Perform a store-conditional operation to Addr. Return the status of the +/// store. This should be 0 if the store succeeded, non-zero otherwise. +Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder, + Value *Val, Value *Addr, AtomicOrdering Ord) const { + BasicBlock *BB = Builder.GetInsertBlock(); + Module *M = BB->getParent()->getParent(); + Type *Ty = Val->getType(); + unsigned SZ = Ty->getPrimitiveSizeInBits(); + assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported"); + Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked + : Intrinsic::hexagon_S4_stored_locked; + Value *Fn = Intrinsic::getDeclaration(M, IntID); + Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx"); + Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), ""); + Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext())); + return Ext; +} + +TargetLowering::AtomicExpansionKind +HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + // Do not expand loads and stores that don't exceed 64 bits. + return LI->getType()->getPrimitiveSizeInBits() > 64 + ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; +} + +bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + // Do not expand loads and stores that don't exceed 64 bits. + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h new file mode 100644 index 0000000..bf378b9 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -0,0 +1,252 @@ +//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Hexagon uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H + +#include "Hexagon.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +// Return true when the given node fits in a positive half word. +bool isPositiveHalfWord(SDNode *N); + + namespace HexagonISD { + enum NodeType : unsigned { + OP_BEGIN = ISD::BUILTIN_OP_END, + + CONST32 = OP_BEGIN, + CONST32_GP, // For marking data present in GP. + FCONST32, + ALLOCA, + ARGEXTEND, + + AT_GOT, // Index in GOT. + AT_PCREL, // Offset relative to PC. + + CALLv3, // A V3+ call instruction. + CALLv3nr, // A V3+ call instruction that doesn't return. + CALLR, + + RET_FLAG, // Return with a flag operand. + BARRIER, // Memory barrier. + JT, // Jump table. + CP, // Constant pool. + + POPCOUNT, + COMBINE, + PACKHL, + VSPLATB, + VSPLATH, + SHUFFEB, + SHUFFEH, + SHUFFOB, + SHUFFOH, + VSXTBH, + VSXTBW, + VSRAW, + VSRAH, + VSRLW, + VSRLH, + VSHLW, + VSHLH, + VCMPBEQ, + VCMPBGT, + VCMPBGTU, + VCMPHEQ, + VCMPHGT, + VCMPHGTU, + VCMPWEQ, + VCMPWGT, + VCMPWGTU, + + INSERT, + INSERTRP, + EXTRACTU, + EXTRACTURP, + VCOMBINE, + TC_RETURN, + EH_RETURN, + DCFETCH, + + OP_END + }; + } + + class HexagonSubtarget; + + class HexagonTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + + bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) + const; + void promoteLdStType(EVT VT, EVT PromotedLdStVT); + const HexagonTargetMachine &HTM; + const HexagonSubtarget &Subtarget; + + public: + explicit HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &ST); + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, + bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; + + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; + + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + + // Should we expand the build vector with shuffles? + bool shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + const char *getTargetNodeName(unsigned Opcode) const override; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const; + + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, + SelectionDAG &DAG) const override; + + bool mayBeEmittedAsTailCall(CallInst *CI) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const override; + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return Hexagon::R0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return Hexagon::R1; + } + + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + EVT getSetCCResultType(const DataLayout &, LLVMContext &C, + EVT VT) const override { + if (!VT.isVector()) + return MVT::i1; + else + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + } + + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + else if (ConstraintCode == "v") + return InlineAsm::Constraint_v; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + // Intrinsics + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// TODO: Handle pre/postinc as well. + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; + /// Return true if folding a constant offset with the given GlobalAddress + /// is legal. It is frequently not legal in PIC relocation models. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + bool isLegalICmpImmediate(int64_t Imm) const override; + + /// Returns relocation base for the given PIC jumptable. + SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) + const override; + + // Handling of atomic RMW instructions. + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { + return AtomicExpansionKind::LLSC; + } + + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) + const override; + }; +} // end namespace llvm + +#endif // Hexagon_ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td new file mode 100644 index 0000000..5a1a69b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td @@ -0,0 +1,462 @@ +//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Hexagon Instruction Mappings +//===----------------------------------------------------------------------===// + + +def : InstAlias<"memb({GP}+#$addr) = $Nt.new", + (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt.new", + (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memw({GP}+#$addr) = $Nt.new", + (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memb({GP}+#$addr) = $Nt", + (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt", + (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt.h", + (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memw({GP}+#$addr) = $Nt", + (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memd({GP}+#$addr) = $Nt", + (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>; + +def : InstAlias<"$Nt = memb({GP}+#$addr)", + (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>; +def : InstAlias<"$Nt = memub({GP}+#$addr)", + (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>; +def : InstAlias<"$Nt = memh({GP}+#$addr)", + (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>; +def : InstAlias<"$Nt = memuh({GP}+#$addr)", + (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>; +def : InstAlias<"$Nt = memw({GP}+#$addr)", + (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>; +def : InstAlias<"$Nt = memd({GP}+#$addr)", + (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>; + +// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt +def : InstAlias<"memb($Rs) = $Rt", + (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt", + (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt.h", + (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memw($Rs) = $Rt", + (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memb($Rs) = $Rt.new", + (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt.new", + (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memw($Rs) = $Rt.new", + (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memb($Rs) = #$S8", + (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memh($Rs) = #$S8", + (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memw($Rs) = #$S8", + (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memd($Rs) = $Rtt", + (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"memb($Rs) = setbit(#$U5)", + (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memh($Rs) = setbit(#$U5)", + (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memw($Rs) = setbit(#$U5)", + (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memb($Rs) = clrbit(#$U5)", + (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memh($Rs) = clrbit(#$U5)", + (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memw($Rs) = clrbit(#$U5)", + (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs) +def : InstAlias<"$Rd = memb($Rs)", + (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memub($Rs)", + (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memh($Rs)", + (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memuh($Rs)", + (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memw($Rs)", + (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memd($Rs)", + (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memubh($Rs)", + (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memubh($Rs)", + (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = membh($Rs)", + (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = membh($Rs)", + (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memb_fifo($Rs)", + (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memh_fifo($Rs)", + (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X) +// to: if ($Pt) $Rd = memXX($Rs) +def : InstAlias<"if ($Pt) $Rd = memb($Rs)", + (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memub($Rs)", + (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memh($Rs)", + (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memuh($Rs)", + (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memw($Rs)", + (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rdd = memd($Rs)", + (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt +// to: if ($Pt) memXX($Rs) = $Rt +def : InstAlias<"if ($Pt) memb($Rs) = $Rt", + (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt", + (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h", + (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = $Rt", + (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memd($Rs) = $Rtt", + (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new", + (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new", + (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new", + (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new", + (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new", + (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new", + (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + + +// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X) +// to: if (!$Pt) $Rd = memXX($Rs) +def : InstAlias<"if (!$Pt) $Rd = memb($Rs)", + (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memub($Rs)", + (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memh($Rs)", + (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)", + (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memw($Rs)", + (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)", + (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt +// to: if (!$Pt) memXX($Rs) = $Rt +def : InstAlias<"if (!$Pt) memb($Rs) = $Rt", + (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt", + (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h", + (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = $Rt", + (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt", + (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new", + (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new", + (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new", + (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new", + (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new", + (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new", + (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memb($Rs) = #$S6", + (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = #$S6", + (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = #$S6", + (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6", + (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6", + (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6", + (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memb($Rs) = #$S6", + (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = #$S6", + (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = #$S6", + (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6", + (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6", + (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6", + (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -= +// to: memXX($Rs) |= $Rt +def : InstAlias<"memb($Rs) &= $Rt", + (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) |= $Rt", + (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) += $Rt", + (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) -= $Rt", + (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) += #$U5", + (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) -= #$U5", + (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) &= $Rt", + (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) |= $Rt", + (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) += $Rt", + (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) -= $Rt", + (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) += #$U5", + (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) -= #$U5", + (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) &= $Rt", + (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) |= $Rt", + (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) += $Rt", + (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) -= $Rt", + (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) += #$U5", + (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) -= #$U5", + (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +// +// Alias of: if ($Pv.new) memX($Rs) = $Rt +// to: if (p3.new) memX(r17 + #0) = $Rt +def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt", + (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt", + (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h", + (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt", + (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt", + (S4_pstorerdtnew_io + PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt", + (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt", + (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h", + (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt", + (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt", + (S4_pstorerdfnew_io + PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +// +// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ... +// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0) +def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)", + (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)", + (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)", + (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)", + (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)", + (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)", + (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)", + (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)", + (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)", + (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)", + (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)", + (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)", + (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"dcfetch($Rs)", + (Y2_dcfetchbo IntRegs:$Rs, 0), 0>; + +// Alias of some insn mappings, others must be handled by the parser +def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)", + (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; +def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)", + (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; + +// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs) +def : InstAlias<"$Rd = neg($Rs)", + (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>; + +def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>; +def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>; +def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>; +def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>; + +def : InstAlias<"$Pd = $Ps", + (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>; + +def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)", + (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>; + +def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)", + (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"$Rd = mpyui($Rs,$Rt)", + (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>; + +// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a) +def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)", + (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; +def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)", + (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td new file mode 100644 index 0000000..280832f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td @@ -0,0 +1,1019 @@ +class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { opc{14-4}, src2}; + let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst}; +} + +class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>; +class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>; +class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>; +class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>; +class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>; +class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>; +class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>; +class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>; +class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>; +class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>; +class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>; +class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>; +class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>; +class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>; +class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>; +class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>; +class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>; +class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>; +class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>; +class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>; +class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>; +class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>; +class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>; +class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>; +class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>; +class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>; +class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>; +class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>; +class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>; +class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>; +class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>; +class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>; +class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>; +class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>; +class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>; +class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>; +class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>; +class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>; +class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>; +class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>; +class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>; +class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>; +class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>; +class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>; +class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>; +class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>; +class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>; +class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>; +class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>; +class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>; +class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>; +class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>; +class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>; +class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>; +class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>; +class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>; +class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>; +class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>; +class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>; +class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>; +class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>; +class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>; +class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>; +class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>; +class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>; +class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>; +class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>; +class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>; +class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>; +class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>; +class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>; +class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>; +class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>; +class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>; +class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>; +class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>; +class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>; +class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>; +class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>; +class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>; +class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>; +class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>; +class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>; +class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>; +class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>; +class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>; +class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>; +class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>; +class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>; +class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>; +class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>; +class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>; +class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>; +class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>; +class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>; +class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>; +class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>; +class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>; +class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>; +class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>; +class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>; +class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>; +class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>; +class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>; +class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>; +class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>; +class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>; +class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>; +class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>; +class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>; +class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>; +class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>; +class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>; +class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>; +class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>; +class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>; +class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>; +class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>; +class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>; +class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>; +class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>; +class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>; +class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>; +class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>; +class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>; +class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>; +class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>; +class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>; +class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>; +class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>; +class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>; +class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>; +class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>; +class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>; +class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>; +class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>; +class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>; +class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>; +class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>; +class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>; +class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>; +class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>; +class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>; +class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>; +class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>; +class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>; +class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>; +class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>; +class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>; +class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>; +class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>; +class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>; +class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>; +class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>; +class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>; +class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>; +class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>; +class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>; +class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>; +class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>; +class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>; +class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>; +class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>; +class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>; +class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>; +class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>; +class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>; +class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>; +class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>; +class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>; +class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>; +class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>; +class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>; +class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>; +class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>; +class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>; +class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>; +class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>; +class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>; +class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>; +class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>; +class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>; +class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>; +class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>; +class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>; + +class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon { + bits<2> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} }; + let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} }; +} + +class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>; +class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>; +class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>; +class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>; +class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>; +class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>; +class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>; +class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>; +class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>; +class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>; +class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>; +class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>; +class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>; +class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>; +class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>; +class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>; +class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>; +class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>; +class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>; +class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>; +class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>; +class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>; +class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>; +class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>; +class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>; +class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>; +class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>; +class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>; +class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>; +class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>; +class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>; +class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>; +class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>; +class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>; +class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>; +class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>; +class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>; +class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>; + +class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + + let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} }; + let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>; +class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>; +class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>; +class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>; +class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>; +class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>; +class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>; +class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>; +class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>; +class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>; +class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>; +class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>; + +class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + + let Inst{31-16} = { 0b00011110000000, opc{5-4} }; + let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>; +class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>; +class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>; +class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>; +class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>; +class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>; +class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>; +class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>; +class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>; +class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>; +class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>; +class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>; +class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>; +class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>; +class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>; +class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>; +class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>; +class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>; +class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>; +class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>; +class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>; +class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>; +class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>; +class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>; +class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>; + +class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>; +class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>; +class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>; +class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>; +class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>; +class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>; +class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>; + +class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>; +class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>; +class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>; +class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>; +class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>; +class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>; +class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>; + +class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + bits<5> src3; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + bits<5> src3; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>; +class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>; +class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>; + +class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>; +class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>; +class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>; + +class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + bits<3> src3; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>; +class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>; + +class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + bits<3> src3; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>; +class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>; + +class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<4> src3_vector; + bits<5> src4; + + let src3_vector = src3{9-6}; + let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<11> src3; + bits<4> src3_vector; + bits<5> src4; + + let src3_vector = src3{10-7}; + let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>; +class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>; +class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>; +class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>; +class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>; +class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>; +class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>; +class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>; +class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>; +class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>; + +class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>; +class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>; +class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>; +class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>; +class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>; +class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>; +class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>; +class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>; +class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>; +class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<4> src3_vector; + bits<3> src4; + + let src3_vector = src3{9-6}; + let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>; +class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>; +class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>; +class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<11> src3; + bits<4> src3_vector; + bits<3> src4; + + let src3_vector = src3{10-7}; + let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>; +class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>; +class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>; +class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>; + +// TODO: Change script to generate dst, src1, src2 instead of +// dst, dst2, src1. +class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>; +class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>; +class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>; +class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>; +class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>; +class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>; +class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>; + +class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>; +class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>; +class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>; +class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>; +class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>; +class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>; +class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>; + + +// TODO: Change script to generate src1, src2 and src3 instead of +// dst, src1, src2. +class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + bits<5> src3; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} }; + let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>; +class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>; +class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>; + +class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + bits<5> src3; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} }; + let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>; +class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>; +class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>; + +// TODO: Change script to generate src1, src2 and src3 instead of +// dst, src1, src2. +class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + bits<3> src3; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>; +class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + bits<3> src3; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>; +class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>; + +// TODO: Change script to generate src1, src2,src3 and src4 instead of +// dst, src1, src2, src3. +class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src3_vector; + bits<5> src4; + + let src3_vector = src3{8-6}; + let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>; +class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>; +class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>; +class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>; +class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>; +class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>; +class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>; +class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>; +class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>; +class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>; + +// TODO: Change script to generate src1, src2,src3 and src4 instead of +// dst, src1, src2, src3. +class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<3> src3_vector; + bits<5> src4; + + let src3_vector = src3{9-7}; + let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>; +class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>; +class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>; +class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>; +class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>; +class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>; +class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>; +class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>; +class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>; +class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src3_vector; + bits<3> src4; + + let src3_vector = src3{8-6}; + let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>; +class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>; +class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>; +class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<3> src3_vector; + bits<3> src4; + + let src3_vector = src3{9-7}; + let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>; +class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>; +class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>; +class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>; + +class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<1> src2; + + let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>; +class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>; +class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>; +class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>; +class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>; +class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>; +class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>; + +class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon { + bits<5> src1; + bits<1> src2; + bits<5> src3; + + let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>; +class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>; +class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>; + +class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon { + bits<5> src1; + bits<1> src2; + bits<3> src3; + + let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} }; +} + +class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>; +class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>; + +class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<1> src3; + bits<5> src4; + + let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>; +class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>; +class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>; +class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>; +class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>; +class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>; +class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>; +class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>; +class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>; +class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<1> src3; + bits<3> src4; + + let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>; +class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>; +class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>; +class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>; + + +class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<1> src3; + + let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} }; + let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} }; +} + +class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>; +class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>; +class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>; +class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>; +class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>; +class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>; + +class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon { + bits<5> dst; + bits<2> src1; + bits<5> src2; + + let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} }; + let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} }; +} + +class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>; +class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>; + +class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon { + bits<5> src1; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { 0b00011001111, src3{4-0} }; + let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} }; +} + +class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>; +class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>; + + +class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + + let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 }; + let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} }; +} + +class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>; +class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>; + +class Enc_X_p3op<bits<8> opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} }; + let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} }; +} + +class V6_vnccombine_enc : Enc_X_p3op<0b00001000>; +class V6_vccombine_enc : Enc_X_p3op<0b00001100>; + +class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<3> src3; + + let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} }; + let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>; +class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>; +class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>; +class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>; +class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>; +class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>; +class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>; +class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>; +class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>; +class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>; +class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>; +class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>; +class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>; +class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>; +class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>; + +class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<3> src3; + + let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} }; + let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} }; +} + +class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>; +class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>; +class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>; +class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>; +class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>; +class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>; +class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>; +class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>; + +class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon { + bits<2> dst; + bits<2> src1; + bits<2> src2; + + let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 }; + let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} }; +} + +class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>; +class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>; +class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>; +class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>; +class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>; + +class V6_pred_not_enc : OpcodeHexagon { + bits<2> dst; + bits<2> src1; + + let Inst{31-16} = { 0b0001111000000011 }; + let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} }; +} + +class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} }; + let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} }; +} + +class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>; +class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>; + +class Enc_X_2op<bits<16> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + + let Inst{31-16} = { opc{15-5}, src1{4-0} }; + let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} }; +} + +class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>; +class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>; +class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>; + + +class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon { + bits<2> dst; + bits<5> src1; + + let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} }; + let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} }; +} + +class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>; +class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>; + +class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<6> src2; + + let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} }; + let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} }; +} + +class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>; +class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>; +class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>; +class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>; +class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>; +class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>; + +class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { opc{14-4}, src1{4-0} }; + let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} }; +} + +class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>; +class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>; +class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>; +class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>; +class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>; +class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>; +class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>; +class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>; + +class Enc_no_operands<bits<25> opc> : OpcodeHexagon { + + let Inst{31-16} = { opc{24-10}, 0 }; + let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 }; +} + +class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>; +class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>; +class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>; +class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>; + +class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon { + bits<5> src1; + + let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} }; + let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 }; +} + +class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>; +class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>; + +class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon { + bits<5> src1; + + let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 }; + let Inst{13-0} = { 0, src1{4-0}, 0b00000000 }; +} + +class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>; +class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>; + +class A5_ACS_enc : OpcodeHexagon { + bits<5> dst1; + bits<2> dst2; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b11101010101, src1{4-0} }; + let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} }; +} + +class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<2> src3; + + let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} }; + let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} }; +} + +class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>; +class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>; +class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>; + +class V6_vhistq_enc : OpcodeHexagon { + bits<2> src1; + + let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 }; + let Inst{13-0} = { 0b10000010000000 }; +} + +// TODO: Change script to generate dst1 instead of dst. +class A6_vminub_RdP_enc : OpcodeHexagon { + bits<5> dst1; + bits<2> dst2; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b11101010111, src2{4-0} }; + let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} }; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td new file mode 100644 index 0000000..3c5ec17 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -0,0 +1,448 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Hexagon Instruction Flags + +// +// *** Must match HexagonBaseInfo.h *** +//===----------------------------------------------------------------------===// + +class IType<bits<5> t> { + bits<5> Value = t; +} +def TypePSEUDO : IType<0>; +def TypeALU32 : IType<1>; +def TypeCR : IType<2>; +def TypeJR : IType<3>; +def TypeJ : IType<4>; +def TypeLD : IType<5>; +def TypeST : IType<6>; +def TypeSYSTEM : IType<7>; +def TypeXTYPE : IType<8>; +def TypeENDLOOP: IType<31>; + +// Maintain list of valid subtargets for each instruction. +class SubTarget<bits<6> value> { + bits<6> Value = value; +} + +def HasAnySubT : SubTarget<0x3f>; // 111111 +def HasV5SubT : SubTarget<0x3e>; // 111110 +def HasV55SubT : SubTarget<0x3c>; // 111100 +def HasV60SubT : SubTarget<0x38>; // 111000 + +// Addressing modes for load/store instructions +class AddrModeType<bits<3> value> { + bits<3> Value = value; +} + +def NoAddrMode : AddrModeType<0>; // No addressing mode +def Absolute : AddrModeType<1>; // Absolute addressing mode +def AbsoluteSet : AddrModeType<2>; // Absolute set addressing mode +def BaseImmOffset : AddrModeType<3>; // Indirect with offset +def BaseLongOffset : AddrModeType<4>; // Indirect with long offset +def BaseRegOffset : AddrModeType<5>; // Indirect with register offset +def PostInc : AddrModeType<6>; // Post increment addressing mode + +class MemAccessSize<bits<4> value> { + bits<4> Value = value; +} + +def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction. +def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb). +def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh). +def WordAccess : MemAccessSize<3>;// Word access instruction (memw). +def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) +def Vector64Access : MemAccessSize<7>;// Vector access instruction (memv) +def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv) + + +//===----------------------------------------------------------------------===// +// Instruction Class Declaration + +//===----------------------------------------------------------------------===// + +class OpcodeHexagon { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + + let Inst{31-28} = IClass; + + bits<1> zero = 0; +} + +class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr, InstrItinClass itin, IType type> + : Instruction { + let Namespace = "Hexagon"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + let Constraints = cstr; + let Itinerary = itin; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + // Instruction type according to the ISA. + IType Type = type; + let TSFlags{4-0} = Type.Value; + + // Solo instructions, i.e., those that cannot be in a packet with others. + bits<1> isSolo = 0; + let TSFlags{5} = isSolo; + // Packed only with A or X-type instructions. + bits<1> isSoloAX = 0; + let TSFlags{6} = isSoloAX; + // Only A-type instruction in first slot or nothing. + bits<1> isSoloAin1 = 0; + let TSFlags{7} = isSoloAin1; + + // Predicated instructions. + bits<1> isPredicated = 0; + let TSFlags{8} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{9} = isPredicatedFalse; + bits<1> isPredicatedNew = 0; + let TSFlags{10} = isPredicatedNew; + bits<1> isPredicateLate = 0; + let TSFlags{11} = isPredicateLate; // Late predicate producer insn. + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{12} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{13} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{16-14} = opNewValue; // New-value produced operand. + bits<1> isNVStorable = 0; + let TSFlags{17} = isNVStorable; // Store that can become new-value store. + bits<1> isNVStore = 0; + let TSFlags{18} = isNVStore; // New-value store insn. + bits<1> isCVLoadable = 0; + let TSFlags{19} = isCVLoadable; // Load that can become cur-value load. + bits<1> isCVLoad = 0; + let TSFlags{20} = isCVLoad; // Cur-value load insn. + + // Immediate extender helper fields. + bits<1> isExtendable = 0; + let TSFlags{21} = isExtendable; // Insn may be extended. + bits<1> isExtended = 0; + let TSFlags{22} = isExtended; // Insn must be extended. + bits<3> opExtendable = 0; + let TSFlags{25-23} = opExtendable; // Which operand may be extended. + bits<1> isExtentSigned = 0; + let TSFlags{26} = isExtentSigned; // Signed or unsigned range. + bits<5> opExtentBits = 0; + let TSFlags{31-27} = opExtentBits; //Number of bits of range before extending. + bits<2> opExtentAlign = 0; + let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending. + + // If an instruction is valid on a subtarget, set the corresponding + // bit from validSubTargets. + // By default, instruction is valid on all subtargets. + SubTarget validSubTargets = HasAnySubT; + let TSFlags{39-34} = validSubTargets.Value; + + // Addressing mode for load/store instructions. + AddrModeType addrMode = NoAddrMode; + let TSFlags{42-40} = addrMode.Value; + + // Memory access size for mem access instructions (load/store) + MemAccessSize accessSize = NoMemAccess; + let TSFlags{46-43} = accessSize.Value; + + bits<1> isTaken = 0; + let TSFlags {47} = isTaken; // Branch prediction. + + bits<1> isFP = 0; + let TSFlags {48} = isFP; // Floating-point. + + bits<1> hasNewValue2 = 0; + let TSFlags{50} = hasNewValue2; // Second New-value producer insn. + bits<3> opNewValue2 = 0; + let TSFlags{53-51} = opNewValue2; // Second New-value produced operand. + + bits<1> isAccumulator = 0; + let TSFlags{54} = isAccumulator; + + // Fields used for relation models. + bit isNonTemporal = 0; + string isNT = ""; // set to "true" for non-temporal vector stores. + string BaseOpcode = ""; + string CextOpcode = ""; + string PredSense = ""; + string PNewValue = ""; + string NValueST = ""; // Set to "true" for new-value stores. + string InputType = ""; // Input is "imm" or "reg" type. + string isFloat = "false"; // Set to "true" for the floating-point load/store. + string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions + + let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"), + ""); + let PNewValue = !if(isPredicatedNew, "new", ""); + let NValueST = !if(isNVStore, "true", "false"); + let isNT = !if(isNonTemporal, "true", "false"); + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** +} + +//===----------------------------------------------------------------------===// +// Instruction Classes Definitions + +//===----------------------------------------------------------------------===// + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +let mayLoad = 1 in +class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; + +let mayLoad = 1 in +class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1 in +class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; + +let mayLoad = 1 in +class LD1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +let mayStore = 1 in +class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; + +class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +let mayStore = 1 in +class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; + +// Same as ST0Inst but doesn't derive from OpcodeHexagon. +let mayStore = 1 in +class ST1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> + : STInst<outs, ins, asmstr, pattern, cstr, itin>; + +// SYSTEM Instruction Class in V4 can take SLOT0 only +// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. +class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>, + OpcodeHexagon; + +// ALU32 Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon; + +// ALU64 Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. +class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; + +class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> + : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>; + + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; + +// Same as above but doesn't derive from OpcodeHexagon +class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = M_tc_2_SLOT23> + : MInst<outs, ins, asmstr, pattern, cstr, itin>; + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; + +class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23> + : SInst<outs, ins, asmstr, pattern, cstr, itin>; + +// J Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon; + +// JR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon; + +// CR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon; + +let isCodeGenOnly = 1, isPseudo = 1 in +class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>, + OpcodeHexagon; + +let isCodeGenOnly = 1, isPseudo = 1 in +class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>, + OpcodeHexagon; + +let isCodeGenOnly = 1, isPseudo = 1 in +class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr=""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>, + OpcodeHexagon; + +//===----------------------------------------------------------------------===// +// Instruction Classes Definitions - +//===----------------------------------------------------------------------===// + + +// +// ALU32 patterns +//. +class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>; + +class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>; + +class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>; + +class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>; + +// +// ALU64 patterns. +// +class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23> + : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>; + +class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23> + : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>; + +// Post increment ST Instruction. +class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +let mayStore = 1 in +class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +// Post increment LD Instruction. +class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1 in +class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormatsV4.td" + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V60 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormatsV60.td" + +//===----------------------------------------------------------------------===// +// V60 Instruction Format Definitions + +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td new file mode 100644 index 0000000..2d1dea5 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -0,0 +1,155 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------// +// Hexagon Instruction Flags +// +// *** Must match BaseInfo.h *** +//----------------------------------------------------------------------------// + +def TypeMEMOP : IType<9>; +def TypeNV : IType<10>; +def TypeDUPLEX : IType<11>; +def TypeCOMPOUND : IType<12>; +def TypePREFIX : IType<30>; + +// Duplex Instruction Class Declaration +//===----------------------------------------------------------------------===// + +class OpcodeDuplex { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + bits<13> ISubHi = 0; // Low sub-insn + bits<13> ISubLo = 0; // High sub-insn + + let Inst{31-29} = IClass{3-1}; + let Inst{13} = IClass{0}; + let Inst{15-14} = 0; + let Inst{28-16} = ISubHi; + let Inst{12-0} = ISubLo; +} + +class InstDuplex<bits<4> iClass, list<dag> pattern = [], + string cstr = ""> + : Instruction, OpcodeDuplex { + let Namespace = "Hexagon"; + IType Type = TypeDUPLEX; // uses slot 0,1 + let isCodeGenOnly = 1; + let hasSideEffects = 0; + dag OutOperandList = (outs); + dag InOperandList = (ins); + let IClass = iClass; + let Constraints = cstr; + let Itinerary = DUPLEX; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + let TSFlags{4-0} = Type.Value; + + // Predicated instructions. + bits<1> isPredicated = 0; + let TSFlags{6} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{7} = isPredicatedFalse; + bits<1> isPredicatedNew = 0; + let TSFlags{8} = isPredicatedNew; + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{9} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{10} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{13-11} = opNewValue; // New-value produced operand. + bits<1> isNVStorable = 0; + let TSFlags{14} = isNVStorable; // Store that can become new-value store. + bits<1> isNVStore = 0; + let TSFlags{15} = isNVStore; // New-value store insn. + + // Immediate extender helper fields. + bits<1> isExtendable = 0; + let TSFlags{16} = isExtendable; // Insn may be extended. + bits<1> isExtended = 0; + let TSFlags{17} = isExtended; // Insn must be extended. + bits<3> opExtendable = 0; + let TSFlags{20-18} = opExtendable; // Which operand may be extended. + bits<1> isExtentSigned = 0; + let TSFlags{21} = isExtentSigned; // Signed or unsigned range. + bits<5> opExtentBits = 0; + let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending. + bits<2> opExtentAlign = 0; + let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending. +} + +//----------------------------------------------------------------------------// +// Instruction Classes Definitions +//----------------------------------------------------------------------------// + +// +// NV type instructions. +// +class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon; + +class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> + : NVInst<outs, ins, asmstr, pattern, cstr, itin>; + +// Definition of Post increment new value store. +class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> + : NVInst<outs, ins, asmstr, pattern, cstr, itin>; + +// Post increment ST Instruction. +let mayStore = 1 in +class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> + : NVInst<outs, ins, asmstr, pattern, cstr, itin>; + +// New-value conditional branch. +class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1, mayStore = 1 in +class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>, + OpcodeHexagon; + +class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> + : MEMInst<outs, ins, asmstr, pattern, cstr, itin>; + +let isCodeGenOnly = 1 in +class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> + : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123, + TypePREFIX>, OpcodeHexagon; + +class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypeDUPLEX>, + OpcodeHexagon; + +class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>, + OpcodeHexagon; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td new file mode 100644 index 0000000..f3d43de --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -0,0 +1,238 @@ +//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------// +// Hexagon Intruction Flags + +// +// *** Must match BaseInfo.h *** +//----------------------------------------------------------------------------// + +def TypeCVI_VA : IType<13>; +def TypeCVI_VA_DV : IType<14>; +def TypeCVI_VX : IType<15>; +def TypeCVI_VX_DV : IType<16>; +def TypeCVI_VP : IType<17>; +def TypeCVI_VP_VS : IType<18>; +def TypeCVI_VS : IType<19>; +def TypeCVI_VINLANESAT : IType<20>; +def TypeCVI_VM_LD : IType<21>; +def TypeCVI_VM_TMP_LD : IType<22>; +def TypeCVI_VM_CUR_LD : IType<23>; +def TypeCVI_VM_VP_LDU : IType<24>; +def TypeCVI_VM_ST : IType<25>; +def TypeCVI_VM_NEW_ST : IType<26>; +def TypeCVI_VM_STU : IType<27>; +def TypeCVI_HIST : IType<28>; +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + +let validSubTargets = HasV60SubT in +{ +class CVI_VA_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VA_DV_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource_late<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_LATE> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>, + Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Slot2_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV_SLOT2> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_early<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_EARLY> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_LONG> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_long_early<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_LONG_EARLY> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VS_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VS> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VINLANESAT_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VINLANESAT> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VINLANESAT>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VS_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VS> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_LD_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_LD_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_TMP_LD_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_TMP_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_TMP_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_CUR_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_VP_LDU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_VP_LDU_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_VP_LDU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_ST_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_ST_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_NEW_ST_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_NEW_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_NEW_ST_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_NEW_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_STU_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_STU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_STU_Resource_long<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_STU> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_HIST_Resource<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_HIST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; +} + +let validSubTargets = HasV60SubT in +{ +class CVI_VA_Resource1<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>, + Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource1<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>, + Requires<[HasV60T, UseHVX]>; + +class CVI_HIST_Resource1<dag outs, dag ins, string asmstr, + list<dag> pattern = [], string cstr = "", + InstrItinClass itin = CVI_HIST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, + Requires<[HasV60T, UseHVX]>; +} + + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp new file mode 100644 index 0000000..eb3590c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -0,0 +1,3828 @@ +//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <cctype> + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-instrinfo" + +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#include "HexagonGenInstrInfo.inc" +#include "HexagonGenDFAPacketizer.inc" + +using namespace llvm; + +cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, + cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" + "packetization boundary.")); + +static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction", + cl::Hidden, cl::init(true), cl::desc("Enable branch prediction")); + +static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable schedule adjustment for new value stores.")); + +static cl::opt<bool> EnableTimingClassLatency( + "enable-timing-class-latency", cl::Hidden, cl::init(false), + cl::desc("Enable timing class latency")); + +static cl::opt<bool> EnableALUForwarding( + "enable-alu-forwarding", cl::Hidden, cl::init(true), + cl::desc("Enable vec alu forwarding")); + +static cl::opt<bool> EnableACCForwarding( + "enable-acc-forwarding", cl::Hidden, cl::init(true), + cl::desc("Enable vec acc forwarding")); + +static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large", + cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm")); + +/// +/// Constants for Hexagon instructions. +/// +const int Hexagon_MEMV_OFFSET_MAX_128B = 2047; // #s7 +const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7 +const int Hexagon_MEMV_OFFSET_MAX = 1023; // #s6 +const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6 +const int Hexagon_MEMW_OFFSET_MAX = 4095; +const int Hexagon_MEMW_OFFSET_MIN = -4096; +const int Hexagon_MEMD_OFFSET_MAX = 8191; +const int Hexagon_MEMD_OFFSET_MIN = -8192; +const int Hexagon_MEMH_OFFSET_MAX = 2047; +const int Hexagon_MEMH_OFFSET_MIN = -2048; +const int Hexagon_MEMB_OFFSET_MAX = 1023; +const int Hexagon_MEMB_OFFSET_MIN = -1024; +const int Hexagon_ADDI_OFFSET_MAX = 32767; +const int Hexagon_ADDI_OFFSET_MIN = -32768; +const int Hexagon_MEMD_AUTOINC_MAX = 56; +const int Hexagon_MEMD_AUTOINC_MIN = -64; +const int Hexagon_MEMW_AUTOINC_MAX = 28; +const int Hexagon_MEMW_AUTOINC_MIN = -32; +const int Hexagon_MEMH_AUTOINC_MAX = 14; +const int Hexagon_MEMH_AUTOINC_MIN = -16; +const int Hexagon_MEMB_AUTOINC_MAX = 7; +const int Hexagon_MEMB_AUTOINC_MIN = -8; +const int Hexagon_MEMV_AUTOINC_MAX = 192; +const int Hexagon_MEMV_AUTOINC_MIN = -256; +const int Hexagon_MEMV_AUTOINC_MAX_128B = 384; +const int Hexagon_MEMV_AUTOINC_MIN_128B = -512; + +// Pin the vtable to this file. +void HexagonInstrInfo::anchor() {} + +HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + RI() {} + + +static bool isIntRegForSubInst(unsigned Reg) { + return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || + (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); +} + + +static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) { + return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) && + isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg)); +} + + +/// Calculate number of instructions excluding the debug instructions. +static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, + MachineBasicBlock::const_instr_iterator MIE) { + unsigned Count = 0; + for (; MIB != MIE; ++MIB) { + if (!MIB->isDebugValue()) + ++Count; + } + return Count; +} + + +/// Find the hardware loop instruction used to set-up the specified loop. +/// On Hexagon, we have two instructions used to set-up the hardware loop +/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions +/// to indicate the end of a loop. +static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, + SmallPtrSet<MachineBasicBlock *, 8> &Visited) { + int LOOPi; + int LOOPr; + if (EndLoopOp == Hexagon::ENDLOOP0) { + LOOPi = Hexagon::J2_loop0i; + LOOPr = Hexagon::J2_loop0r; + } else { // EndLoopOp == Hexagon::EndLOOP1 + LOOPi = Hexagon::J2_loop1i; + LOOPr = Hexagon::J2_loop1r; + } + + // The loop set-up instruction will be in a predecessor block + for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(), + PE = BB->pred_end(); PB != PE; ++PB) { + // If this has been visited, already skip it. + if (!Visited.insert(*PB).second) + continue; + if (*PB == BB) + continue; + for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(), + E = (*PB)->instr_rend(); I != E; ++I) { + int Opc = I->getOpcode(); + if (Opc == LOOPi || Opc == LOOPr) + return &*I; + // We've reached a different loop, which means the loop0 has been removed. + if (Opc == EndLoopOp) + return 0; + } + // Check the predecessors for the LOOP instruction. + MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited); + if (loop) + return loop; + } + return 0; +} + + +/// Gather register def/uses from MI. +/// This treats possible (predicated) defs as actually happening ones +/// (conservatively). +static inline void parseOperands(const MachineInstr *MI, + SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) { + Defs.clear(); + Uses.clear(); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (MO.isUse()) + Uses.push_back(MO.getReg()); + + if (MO.isDef()) + Defs.push_back(MO.getReg()); + } +} + + +// Position dependent, so check twice for swap. +static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + return false; + case HexagonII::HSIG_L1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_L2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_A: + return (Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_Compound: + return (Gb == HexagonII::HSIG_Compound); + } + return false; +} + + + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case Hexagon::L2_loadri_io: + case Hexagon::L2_loadrd_io: + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerd_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(2).getReg(); + } + break; + } + return 0; +} + + +/// This function can analyze one/two way branching only and should (mostly) be +/// called by target independent side. +/// First entry is always the opcode of the branching instruction, except when +/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a +/// BB with only unconditional jump. Subsequent entries depend upon the opcode, +/// e.g. Jump_c p will have +/// Cond[0] = Jump_c +/// Cond[1] = p +/// HW-loop ENDLOOP: +/// Cond[0] = ENDLOOP +/// Cond[1] = MBB +/// New value jump: +/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode +/// Cond[1] = R +/// Cond[2] = Imm +/// +bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + TBB = nullptr; + FBB = nullptr; + Cond.clear(); + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + if (I == MBB.instr_begin()) + return false; + + // A basic block may looks like this: + // + // [ insn + // EH_LABEL + // insn + // insn + // insn + // EH_LABEL + // insn ] + // + // It has two succs but does not have a terminator + // Don't know how to handle it. + do { + --I; + if (I->isEHLabel()) + // Don't analyze EH branches. + return true; + } while (I != MBB.instr_begin()); + + I = MBB.instr_end(); + --I; + + while (I->isDebugValue()) { + if (I == MBB.instr_begin()) + return false; + --I; + } + + bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && + I->getOperand(0).isMBB(); + // Delete the J2_jump if it's equivalent to a fall-through. + if (AllowModify && JumpToBlock && + MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); + I->eraseFromParent(); + I = MBB.instr_end(); + if (I == MBB.instr_begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(&*I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + MachineInstr *SecondLastInst = nullptr; + // Find one more terminator if present. + for (;;) { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { + if (!SecondLastInst) + SecondLastInst = &*I; + else + // This is a third branch. + return true; + } + if (I == MBB.instr_begin()) + break; + --I; + } + + int LastOpcode = LastInst->getOpcode(); + int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0; + // If the branch target is not a basic block, it could be a tail call. + // (It is, if the target is a function.) + if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB()) + return true; + if (SecLastOpcode == Hexagon::J2_jump && + !SecondLastInst->getOperand(0).isMBB()) + return true; + + bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); + bool LastOpcodeHasNVJump = isNewValueJump(LastInst); + + // If there is only one terminator instruction, process it. + if (LastInst && !SecondLastInst) { + if (LastOpcode == Hexagon::J2_jump) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isEndLoopN(LastOpcode)) { + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + if (LastOpcodeHasJMP_c) { + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Only supporting rr/ri versions of new-value jumps. + if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) { + TBB = LastInst->getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); + return false; + } + DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + << " with one jump\n";); + // Otherwise, don't know what this is. + return true; + } + + bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); + bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst); + if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // Only supporting rr/ri versions of new-value jumps. + if (SecLastOpcodeHasNVJump && + (SecondLastInst->getNumExplicitOperands() == 3) && + (LastOpcode == Hexagon::J2_jump)) { + TBB = SecondLastInst->getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + Cond.push_back(SecondLastInst->getOperand(1)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two Hexagon:JMPs, handle it. The second one is not + // executed, so remove it. + if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst->getIterator(); + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // If the block ends with an ENDLOOP, and J2_jump, handle it. + if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + << " with two jumps";); + // Otherwise, can't handle this. + return true; +} + + +unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber()); + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + // Only removing branches from end of MBB. + if (!I->isBranch()) + return Count; + if (Count && (I->getOpcode() == Hexagon::J2_jump)) + llvm_unreachable("Malformed basic block: unconditional branch not last"); + MBB.erase(&MBB.back()); + I = MBB.end(); + ++Count; + } + return Count; +} + + +unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, DebugLoc DL) const { + unsigned BOpc = Hexagon::J2_jump; + unsigned BccOpc = Hexagon::J2_jumpt; + assert(validateBranchCond(Cond) && "Invalid branching condition"); + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // J2_jumpf. + if (!Cond.empty() && Cond[0].isImm()) + BccOpc = Cond[0].getImm(); + + if (!FBB) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector<MachineOperand, 4> Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && isPredicated(Term) && + !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { + MachineBasicBlock *NextBB = &*++MBB.getIterator(); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, nullptr, Cond, DL); + } + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else if (isNewValueJump(Cond[0].getImm())) { + assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump"); + // New value jump + // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) + // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) + unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); + DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); + if (Cond[2].isReg()) { + unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addReg(Cond[2].getReg(), Flags2).addMBB(TBB); + } else if(Cond[2].isImm()) { + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addImm(Cond[2].getImm()).addMBB(TBB); + } else + llvm_unreachable("Invalid condition for branching"); + } else { + assert((Cond.size() == 2) && "Malformed cond vector"); + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + return 1; + } + assert((!Cond.empty()) && + "Cond. cannot be empty when multiple branchings are required"); + assert((!isNewValueJump(Cond[0].getImm())) && + "NV-jump cannot be inserted with another branch"); + // Special case for hardware loops. The condition is a basic block. + if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else { + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + + return 2; +} + + +bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + BranchProbability Probability) const { + return nonDbgBBSize(&MBB) <= 3; +} + + +bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) + const { + return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3; +} + + +bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs, BranchProbability Probability) const { + return NumInstrs <= 4; +} + + +void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + auto &HRI = getRegisterInfo(); + if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { + // Map Pd = Ps to Pd = or(Ps, Ps). + BuildMI(MBB, I, DL, get(Hexagon::C2_or), + DestReg).addReg(SrcReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { + // We can have an overlap between single and double reg: r1:0 = r0. + if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { + // r1:0 = r0 + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } else { + // r1:0 = r1 or no overlap. + BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg, + Hexagon::subreg_loreg))).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } + return; + } + if (Hexagon::CtrRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + Hexagon::PredRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), + getKillRegState(KillSrc)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), + getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg). + addReg(SrcReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(SrcReg) && + Hexagon::VectorRegsRegClass.contains(DestReg)) { + llvm_unreachable("Unimplemented pred to vec"); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(DestReg) && + Hexagon::VectorRegsRegClass.contains(SrcReg)) { + llvm_unreachable("Unimplemented vec to pred"); + return; + } + if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), + HRI.getSubReg(DestReg, Hexagon::subreg_hireg)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), + getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), + HRI.getSubReg(DestReg, Hexagon::subreg_loreg)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), + getKillRegState(KillSrc)); + return; + } + +#ifndef NDEBUG + // Show the invalid registers to ease debugging. + dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber() + << ": " << PrintReg(DestReg, &HRI) + << " = " << PrintReg(SrcReg, &HRI) << '\n'; +#endif + llvm_unreachable("Unimplemented"); +} + + +void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), Align); + + if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else { + llvm_unreachable("Unimplemented"); + } +} + + +void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned DestReg, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), Align); + if (RC == &Hexagon::IntRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == &Hexagon::DoubleRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == &Hexagon::PredRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else { + llvm_unreachable("Can't store this register to stack slot"); + } +} + + +/// expandPostRAPseudo - This function is called for all pseudo instructions +/// that remain after register allocation. Many pseudo instructions are +/// created to help register allocation. This is the place to convert them +/// into real instructions. The target can edit MI in place, or it can insert +/// new instructions and erase MI. The function should return true if +/// anything was changed. +bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) + const { + const HexagonRegisterInfo &HRI = getRegisterInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = MI->getOpcode(); + const unsigned VecOffset = 1; + bool Is128B = false; + + switch (Opc) { + case Hexagon::ALIGNA: + BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg()) + .addReg(HRI.getFrameRegister()) + .addImm(-MI->getOperand(1).getImm()); + MBB.erase(MI); + return true; + case Hexagon::HEXAGON_V6_vassignp_128B: + case Hexagon::HEXAGON_V6_vassignp: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + if (SrcReg != DstReg) + copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill()); + MBB.erase(MI); + return true; + } + case Hexagon::HEXAGON_V6_lo_128B: + case Hexagon::HEXAGON_V6_lo: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill()); + MBB.erase(MI); + MRI.clearKillFlags(SrcSubLo); + return true; + } + case Hexagon::HEXAGON_V6_hi_128B: + case Hexagon::HEXAGON_V6_hi: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill()); + MBB.erase(MI); + MRI.clearKillFlags(SrcSubHi); + return true; + } + case Hexagon::STrivv_indexed_128B: + Is128B = true; + case Hexagon::STrivv_indexed: { + unsigned SrcReg = MI->getOperand(2).getReg(); + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); + unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B + : Hexagon::V6_vS32b_ai; + unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; + MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd)) + .addOperand(MI->getOperand(0)) + .addImm(MI->getOperand(1).getImm()) + .addReg(SrcSubLo) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MI1New->getOperand(0).setIsKill(false); + BuildMI(MBB, MI, DL, get(NewOpcd)) + .addOperand(MI->getOperand(0)) + // The Vectors are indexed in multiples of vector size. + .addImm(MI->getOperand(1).getImm()+Offset) + .addReg(SrcSubHi) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::LDrivv_pseudo_V6_128B: + case Hexagon::LDrivv_indexed_128B: + Is128B = true; + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDrivv_indexed: { + unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32b_ai; + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; + MachineInstr *MI1New = + BuildMI(MBB, MI, DL, get(NewOpcd), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()); + MI1New->getOperand(1).setIsKill(false); + BuildMI(MBB, MI, DL, get(NewOpcd), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + .addOperand(MI->getOperand(1)) + // The Vectors are indexed in multiples of vector size. + .addImm(MI->getOperand(2).getImm() + Offset) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::LDriv_pseudo_V6_128B: + Is128B = true; + case Hexagon::LDriv_pseudo_V6: { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32b_ai; + int32_t Off = MI->getOperand(2).getImm(); + int32_t Idx = Off; + BuildMI(MBB, MI, DL, get(NewOpc), DstReg) + .addOperand(MI->getOperand(1)) + .addImm(Idx) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::STriv_pseudo_V6_128B: + Is128B = true; + case Hexagon::STriv_pseudo_V6: { + unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B + : Hexagon::V6_vS32b_ai; + int32_t Off = MI->getOperand(1).getImm(); + int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6); + BuildMI(MBB, MI, DL, get(NewOpc)) + .addOperand(MI->getOperand(0)) + .addImm(Idx) + .addOperand(MI->getOperand(2)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::TFR_PdTrue: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::TFR_PdFalse: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::VMULW: { + // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + return true; + } + case Hexagon::VMULW_ACC: { + // Expand 64-bit vector multiply with addition into 2 scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src3Reg = MI->getOperand(3).getReg(); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); + unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi).addReg(Src3SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo).addReg(Src3SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + MRI.clearKillFlags(Src3SubHi); + MRI.clearKillFlags(Src3SubLo); + return true; + } + case Hexagon::MUX64_rr: { + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + const MachineOperand &Op2 = MI->getOperand(2); + const MachineOperand &Op3 = MI->getOperand(3); + unsigned Rd = Op0.getReg(); + unsigned Pu = Op1.getReg(); + unsigned Rs = Op2.getReg(); + unsigned Rt = Op3.getReg(); + DebugLoc DL = MI->getDebugLoc(); + unsigned K1 = getKillRegState(Op1.isKill()); + unsigned K2 = getKillRegState(Op2.isKill()); + unsigned K3 = getKillRegState(Op3.isKill()); + if (Rd != Rs) + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd) + .addReg(Pu, (Rd == Rt) ? K1 : 0) + .addReg(Rs, K2); + if (Rd != Rt) + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd) + .addReg(Pu, K1) + .addReg(Rt, K3); + MBB.erase(MI); + return true; + } + case Hexagon::TCRETURNi: + MI->setDesc(get(Hexagon::J2_jump)); + return true; + case Hexagon::TCRETURNr: + MI->setDesc(get(Hexagon::J2_jumpr)); + return true; + case Hexagon::TFRI_f: + case Hexagon::TFRI_cPt_f: + case Hexagon::TFRI_cNotPt_f: { + unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2; + APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF(); + APInt IVal = FVal.bitcastToAPInt(); + MI->RemoveOperand(Opx); + unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi : + (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit : + Hexagon::C2_cmoveif; + MI->setDesc(get(NewOpc)); + MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue())); + return true; + } + } + + return false; +} + + +// We indicate that we want to reverse the branch by +// inserting the reversed branching opcode. +bool HexagonInstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + if (Cond.empty()) + return true; + assert(Cond[0].isImm() && "First entry in the cond vector not imm-val"); + unsigned opcode = Cond[0].getImm(); + //unsigned temp; + assert(get(opcode).isBranch() && "Should be a branching condition."); + if (isEndLoopN(opcode)) + return true; + unsigned NewOpcode = getInvertedPredicatedOpcode(opcode); + Cond[0].setImm(NewOpcode); + return false; +} + + +void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Hexagon::A2_nop)); +} + + +// Returns true if an instruction is predicated irrespective of the predicate +// sense. For example, all of the following will return true. +// if (p0) R1 = add(R2, R3) +// if (!p0) R1 = add(R2, R3) +// if (p0.new) R1 = add(R2, R3) +// if (!p0.new) R1 = add(R2, R3) +// Note: New-value stores are not included here as in the current +// implementation, we don't need to check their predicate sense. +bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; +} + + +bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI, + ArrayRef<MachineOperand> Cond) const { + if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || + isEndLoopN(Cond[0].getImm())) { + DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); + return false; + } + int Opc = MI->getOpcode(); + assert (isPredicable(MI) && "Expected predicable instruction"); + bool invertJump = predOpcodeHasNot(Cond); + + // We have to predicate MI "in place", i.e. after this function returns, + // MI will need to be transformed into a predicated form. To avoid com- + // plicated manipulations with the operands (handling tied operands, + // etc.), build a new temporary instruction, then overwrite MI with it. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned PredOpc = getCondOpcode(Opc, invertJump); + MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); + unsigned NOp = 0, NumOps = MI->getNumOperands(); + while (NOp < NumOps) { + MachineOperand &Op = MI->getOperand(NOp); + if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) + break; + T.addOperand(Op); + NOp++; + } + + unsigned PredReg, PredRegPos, PredRegFlags; + bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags); + (void)GotPredReg; + assert(GotPredReg); + T.addReg(PredReg, PredRegFlags); + while (NOp < NumOps) + T.addOperand(MI->getOperand(NOp++)); + + MI->setDesc(get(PredOpc)); + while (unsigned n = MI->getNumOperands()) + MI->RemoveOperand(n-1); + for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) + MI->addOperand(T->getOperand(i)); + + MachineBasicBlock::instr_iterator TI = T->getIterator(); + B.erase(TI); + + MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); + MRI.clearKillFlags(PredReg); + return true; +} + + +bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, + ArrayRef<MachineOperand> Pred2) const { + // TODO: Fix this + return false; +} + + +bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + auto &HRI = getRegisterInfo(); + for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { + MachineOperand MO = MI->getOperand(oper); + if (MO.isReg() && MO.isDef()) { + const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg()); + if (RC == &Hexagon::PredRegsRegClass) { + Pred.push_back(MO); + return true; + } + } + } + return false; +} + +bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { + bool isPred = MI->getDesc().isPredicable(); + + if (!isPred) + return false; + + const int Opc = MI->getOpcode(); + int NumOperands = MI->getNumOperands(); + + // Keep a flag for upto 4 operands in the instructions, to indicate if + // that operand has been constant extended. + bool OpCExtended[4]; + if (NumOperands > 4) + NumOperands = 4; + + for (int i = 0; i < NumOperands; i++) + OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI)); + + switch(Opc) { + case Hexagon::A2_tfrsi: + return (isOperandExtended(MI, 1) && isConstExtended(MI)) || + isInt<12>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storerd_io: + return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerinew_io: + return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerhnew_io: + return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerbnew_io: + return isUInt<6>(MI->getOperand(1).getImm()); + + case Hexagon::L2_loadrd_io: + return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadri_io: + return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: + return isUInt<6>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadrd_pi: + return isShiftedInt<4,3>(MI->getOperand(3).getImm()); + + case Hexagon::L2_loadri_pi: + return isShiftedInt<4,2>(MI->getOperand(3).getImm()); + + case Hexagon::L2_loadrh_pi: + case Hexagon::L2_loadruh_pi: + return isShiftedInt<4,1>(MI->getOperand(3).getImm()); + + case Hexagon::L2_loadrb_pi: + case Hexagon::L2_loadrub_pi: + return isInt<4>(MI->getOperand(3).getImm()); + + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) && + (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm())); + + case Hexagon::A2_addi: + return isInt<8>(MI->getOperand(2).getImm()); + + case Hexagon::A2_aslh: + case Hexagon::A2_asrh: + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + return true; + } + + return true; +} + + +bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Throwing call is a boundary. + if (MI->isCall()) { + // If any of the block's successors is a landing pad, this could be a + // throwing call. + for (auto I : MBB->successors()) + if (I->isEHPad()) + return true; + } + + // Don't mess around with no return calls. + if (MI->getOpcode() == Hexagon::CALLv3nr) + return true; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isPosition()) + return true; + + if (MI->isInlineAsm() && !ScheduleInlineAsm) + return true; + + return false; +} + + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorString or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorString or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +/// Hexagon counts the number of ##'s and adjust for that many +/// constant exenders. +unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + StringRef AStr(Str); + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) + atInsnStart = true; + if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { + Length += MAI.getMaxInstLength(); + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) + atInsnStart = false; + } + + // Add to size number of constant extenders seen * 4. + StringRef Occ("##"); + Length += AStr.count(Occ)*4; + return Length; +} + + +ScheduleHazardRecognizer* +HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, const ScheduleDAG *DAG) const { + return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); +} + + +/// \brief For a comparison instruction, return the source registers in +/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. +bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const { + unsigned Opc = MI->getOpcode(); + + // Set mask and the first source register. + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: + SrcReg = MI->getOperand(1).getReg(); + Mask = ~0; + break; + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpbgtui: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFF; + break; + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFFFF; + break; + } + + // Set the value/second source register. + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + SrcReg2 = MI->getOperand(2).getReg(); + return true; + + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgti: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: + SrcReg2 = 0; + Value = MI->getOperand(2).getImm(); + return true; + } + + return false; +} + + +unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, unsigned *PredCost) const { + return getInstrTimingClassLatency(ItinData, MI); +} + + +DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( + const TargetSubtargetInfo &STI) const { + const InstrItineraryData *II = STI.getInstrItineraryData(); + return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II); +} + + +// Inspired by this pair: +// %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] +// S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1] +// Currently AA considers the addresses in these instructions to be aliasing. +bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, + MachineInstr *MIb, AliasAnalysis *AA) const { + int OffsetA = 0, OffsetB = 0; + unsigned SizeA = 0, SizeB = 0; + + if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || + MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef()) + return false; + + // Instructions that are pure loads, not loads and stores like memops are not + // dependent. + if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb)) + return true; + + // Get base, offset, and access size in MIa. + unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA); + if (!BaseRegA || !SizeA) + return false; + + // Get base, offset, and access size in MIb. + unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB); + if (!BaseRegB || !SizeB) + return false; + + if (BaseRegA != BaseRegB) + return false; + + // This is a mem access with the same base register and known offsets from it. + // Reason about it. + if (OffsetA > OffsetB) { + uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB); + return (SizeB <= offDiff); + } else if (OffsetA < OffsetB) { + uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA); + return (SizeA <= offDiff); + } + + return false; +} + + +unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *TRC; + if (VT == MVT::i1) { + TRC = &Hexagon::PredRegsRegClass; + } else if (VT == MVT::i32 || VT == MVT::f32) { + TRC = &Hexagon::IntRegsRegClass; + } else if (VT == MVT::i64 || VT == MVT::f64) { + TRC = &Hexagon::DoubleRegsRegClass; + } else { + llvm_unreachable("Cannot handle this register class"); + } + + unsigned NewReg = MRI.createVirtualRegister(TRC); + return NewReg; +} + + +bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const { + return (getAddrMode(MI) == HexagonII::AbsoluteSet); +} + + +bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); +} + + +bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const { + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + if (!(isTC1(MI)) + && !(QII->isTC2Early(MI)) + && !(MI->getDesc().mayLoad()) + && !(MI->getDesc().mayStore()) + && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe) + && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe) + && !(QII->isMemOp(MI)) + && !(MI->isBranch()) + && !(MI->isReturn()) + && !MI->isCall()) + return true; + + return false; +} + + +// Return true if the instruction is a compund branch instruction. +bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const { + return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch()); +} + + +bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const { + return (MI->isBranch() && isPredicated(MI)) || + isConditionalTransfer(MI) || + isConditionalALU32(MI) || + isConditionalLoad(MI) || + // Predicated stores which don't have a .new on any operands. + (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) && + !isPredicatedNew(MI)); +} + + +bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + case Hexagon::A2_paddf: + case Hexagon::A2_paddfnew: + case Hexagon::A2_paddif: + case Hexagon::A2_paddifnew: + case Hexagon::A2_paddit: + case Hexagon::A2_padditnew: + case Hexagon::A2_paddt: + case Hexagon::A2_paddtnew: + case Hexagon::A2_pandf: + case Hexagon::A2_pandfnew: + case Hexagon::A2_pandt: + case Hexagon::A2_pandtnew: + case Hexagon::A2_porf: + case Hexagon::A2_porfnew: + case Hexagon::A2_port: + case Hexagon::A2_portnew: + case Hexagon::A2_psubf: + case Hexagon::A2_psubfnew: + case Hexagon::A2_psubt: + case Hexagon::A2_psubtnew: + case Hexagon::A2_pxorf: + case Hexagon::A2_pxorfnew: + case Hexagon::A2_pxort: + case Hexagon::A2_pxortnew: + case Hexagon::A4_paslhf: + case Hexagon::A4_paslhfnew: + case Hexagon::A4_paslht: + case Hexagon::A4_paslhtnew: + case Hexagon::A4_pasrhf: + case Hexagon::A4_pasrhfnew: + case Hexagon::A4_pasrht: + case Hexagon::A4_pasrhtnew: + case Hexagon::A4_psxtbf: + case Hexagon::A4_psxtbfnew: + case Hexagon::A4_psxtbt: + case Hexagon::A4_psxtbtnew: + case Hexagon::A4_psxthf: + case Hexagon::A4_psxthfnew: + case Hexagon::A4_psxtht: + case Hexagon::A4_psxthtnew: + case Hexagon::A4_pzxtbf: + case Hexagon::A4_pzxtbfnew: + case Hexagon::A4_pzxtbt: + case Hexagon::A4_pzxtbtnew: + case Hexagon::A4_pzxthf: + case Hexagon::A4_pzxthfnew: + case Hexagon::A4_pzxtht: + case Hexagon::A4_pzxthtnew: + case Hexagon::C2_ccombinewf: + case Hexagon::C2_ccombinewt: + return true; + } + return false; +} + + +// FIXME - Function name and it's functionality don't match. +// It should be renamed to hasPredNewOpcode() +bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const { + if (!MI->getDesc().mayLoad() || !isPredicated(MI)) + return false; + + int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + // Instruction with valid predicated-new opcode can be promoted to .new. + return PNewOpcode >= 0; +} + + +// Returns true if an instruction is a conditional store. +// +// Note: It doesn't include conditional new-value stores as they can't be +// converted to .new predicate. +bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + case Hexagon::S4_pstorerbt_rr: + case Hexagon::S4_pstorerbf_rr: + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S2_pstorerbt_pi: + case Hexagon::S2_pstorerbf_pi: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: + case Hexagon::S4_pstorerdt_rr: + case Hexagon::S4_pstorerdf_rr: + case Hexagon::S2_pstorerdt_pi: + case Hexagon::S2_pstorerdf_pi: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + case Hexagon::S4_pstorerht_rr: + case Hexagon::S4_pstorerhf_rr: + case Hexagon::S2_pstorerht_pi: + case Hexagon::S2_pstorerhf_pi: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + case Hexagon::S4_pstorerit_rr: + case Hexagon::S4_pstorerif_rr: + case Hexagon::S2_pstorerit_pi: + case Hexagon::S2_pstorerif_pi: + + // V4 global address store before promoting to dot new. + case Hexagon::S4_pstorerdt_abs: + case Hexagon::S4_pstorerdf_abs: + case Hexagon::S4_pstorerbt_abs: + case Hexagon::S4_pstorerbf_abs: + case Hexagon::S4_pstorerht_abs: + case Hexagon::S4_pstorerhf_abs: + case Hexagon::S4_pstorerit_abs: + case Hexagon::S4_pstorerif_abs: + return true; + + // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded + // from the "Conditional Store" list. Because a predicated new value store + // would NOT be promoted to a double dot new store. + // This function returns yes for those stores that are predicated but not + // yet promoted to predicate dot new instructions. + } +} + + +bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::A2_tfrt: + case Hexagon::A2_tfrf: + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmoveif: + case Hexagon::A2_tfrtnew: + case Hexagon::A2_tfrfnew: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmovenewif: + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: + return true; + + default: + return false; + } + return false; +} + + +// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle +// isFPImm and later getFPImm as well. +bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; + if (isExtended) // Instruction must be extended. + return true; + + unsigned isExtendable = + (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; + if (!isExtendable) + return false; + + if (MI->isCall()) + return false; + + short ExtOpNum = getCExtOpNum(MI); + const MachineOperand &MO = MI->getOperand(ExtOpNum); + // Use MO operand flags to determine if MO + // has the HMOTF_ConstExtended flag set. + if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + // If this is a Machine BB address we are talking about, and it is + // not marked as extended, say so. + if (MO.isMBB()) + return false; + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || + MO.isJTI() || MO.isCPI()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int MinValue = getMinValue(MI); + int MaxValue = getMaxValue(MI); + int ImmValue = MO.getImm(); + + return (ImmValue < MinValue || ImmValue > MaxValue); +} + + +bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::L4_return : + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_tnew_pt : + case Hexagon::L4_return_fnew_pt : + return true; + } + return false; +} + + +// Return true when ConsMI uses a register defined by ProdMI. +bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + const MCInstrDesc &ProdMCID = ProdMI->getDesc(); + if (!ProdMCID.getNumDefs()) + return false; + + auto &HRI = getRegisterInfo(); + + SmallVector<unsigned, 4> DefsA; + SmallVector<unsigned, 4> DefsB; + SmallVector<unsigned, 8> UsesA; + SmallVector<unsigned, 8> UsesB; + + parseOperands(ProdMI, DefsA, UsesA); + parseOperands(ConsMI, DefsB, UsesB); + + for (auto &RegA : DefsA) + for (auto &RegB : UsesB) { + // True data dependency. + if (RegA == RegB) + return true; + + if (Hexagon::DoubleRegsRegClass.contains(RegA)) + for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) + if (RegB == *SubRegs) + return true; + + if (Hexagon::DoubleRegsRegClass.contains(RegB)) + for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) + if (RegA == *SubRegs) + return true; + } + + return false; +} + + +// Returns true if the instruction is alread a .cur. +bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + case Hexagon::V6_vL32b_cur_pi: + case Hexagon::V6_vL32b_cur_ai: + case Hexagon::V6_vL32b_cur_pi_128B: + case Hexagon::V6_vL32b_cur_ai_128B: + return true; + } + return false; +} + + +// Returns true, if any one of the operands is a dot new +// insn, whether it is predicated dot new or register dot new. +bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const { + if (isNewValueInst(MI) || + (isPredicated(MI) && isPredicatedNew(MI))) + return true; + + return false; +} + + +/// Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa, + const MachineInstr *MIb) const { + HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa); + HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb); + return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); +} + + +bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + if (MI->mayLoad() || MI->mayStore() || MI->isCompare()) + return true; + + // Multiply + unsigned SchedClass = MI->getDesc().getSchedClass(); + if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23) + return true; + return false; +} + + +bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { + return (Opcode == Hexagon::ENDLOOP0 || + Opcode == Hexagon::ENDLOOP1); +} + + +bool HexagonInstrInfo::isExpr(unsigned OpType) const { + switch(OpType) { + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; + default: + return false; + } +} + + +bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { + const MCInstrDesc &MID = MI->getDesc(); + const uint64_t F = MID.TSFlags; + if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) + return true; + + // TODO: This is largely obsolete now. Will need to be removed + // in consecutive patches. + switch(MI->getOpcode()) { + // TFR_FI Remains a special case. + case Hexagon::TFR_FI: + return true; + default: + return false; + } + return false; +} + + +// This returns true in two cases: +// - The OP code itself indicates that this is an extended instruction. +// - One of MOs has been marked with HMOTF_ConstExtended flag. +bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { + // First check if this is permanently extended op code. + const uint64_t F = MI->getDesc().TSFlags; + if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) + return true; + // Use MO operand flags to determine if one of MI's operands + // has HMOTF_ConstExtended flag set. + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + } + return false; +} + + +bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::FPPos) & HexagonII::FPMask; +} + + +// No V60 HVX VMEM with A_INDIRECT. +bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I, + const MachineInstr *J) const { + if (!isV60VectorInstruction(I)) + return false; + if (!I->mayLoad() && !I->mayStore()) + return false; + return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); +} + + +bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::J2_callr : + case Hexagon::J2_callrf : + case Hexagon::J2_callrt : + return true; + } + return false; +} + + +bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::L4_return : + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_fnew_pt : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_tnew_pt : + return true; + } + return false; +} + + +bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::J2_jumpr : + case Hexagon::J2_jumprt : + case Hexagon::J2_jumprf : + case Hexagon::J2_jumprtnewpt : + case Hexagon::J2_jumprfnewpt : + case Hexagon::J2_jumprtnew : + case Hexagon::J2_jumprfnew : + return true; + } + return false; +} + + +// Return true if a given MI can accomodate given offset. +// Use abs estimate as oppose to the exact number. +// TODO: This will need to be changed to use MC level +// definition of instruction extendable field size. +bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI, + unsigned offset) const { + // This selection of jump instructions matches to that what + // AnalyzeBranch can parse, plus NVJ. + if (isNewValueJump(MI)) // r9:2 + return isInt<11>(offset); + + switch (MI->getOpcode()) { + // Still missing Jump to address condition on register value. + default: + return false; + case Hexagon::J2_jump: // bits<24> dst; // r22:2 + case Hexagon::J2_call: + case Hexagon::CALLv3nr: + return isInt<24>(offset); + case Hexagon::J2_jumpt: //bits<17> dst; // r15:2 + case Hexagon::J2_jumpf: + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumpfnewpt: + case Hexagon::J2_callt: + case Hexagon::J2_callf: + return isInt<17>(offset); + case Hexagon::J2_loop0i: + case Hexagon::J2_loop0iext: + case Hexagon::J2_loop0r: + case Hexagon::J2_loop0rext: + case Hexagon::J2_loop1i: + case Hexagon::J2_loop1iext: + case Hexagon::J2_loop1r: + case Hexagon::J2_loop1rext: + return isInt<9>(offset); + // TODO: Add all the compound branches here. Can we do this in Relation model? + case Hexagon::J4_cmpeqi_tp0_jump_nt: + case Hexagon::J4_cmpeqi_tp1_jump_nt: + return isInt<11>(offset); + } +} + + +bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, + const MachineInstr *ESMI) const { + if (!LRMI || !ESMI) + return false; + + bool isLate = isLateResultInstr(LRMI); + bool isEarly = isEarlySourceInstr(ESMI); + + DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- ")); + DEBUG(LRMI->dump()); + DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- ")); + DEBUG(ESMI->dump()); + + if (isLate && isEarly) { + DEBUG(dbgs() << "++Is Late Result feeding Early Source\n"); + return true; + } + + return false; +} + + +bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + switch (MI->getOpcode()) { + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + case TargetOpcode::PHI: + return false; + default: + break; + } + + unsigned SchedClass = MI->getDesc().getSchedClass(); + + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: + case Hexagon::Sched::ALU64_tc_1_SLOT23: + case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: + case Hexagon::Sched::S_2op_tc_1_SLOT23: + case Hexagon::Sched::S_3op_tc_1_SLOT23: + case Hexagon::Sched::V2LDST_tc_ld_SLOT01: + case Hexagon::Sched::V2LDST_tc_st_SLOT0: + case Hexagon::Sched::V2LDST_tc_st_SLOT01: + case Hexagon::Sched::V4LDST_tc_ld_SLOT01: + case Hexagon::Sched::V4LDST_tc_st_SLOT0: + case Hexagon::Sched::V4LDST_tc_st_SLOT01: + return false; + } + return true; +} + + +bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply + // resource, but all operands can be received late like an ALU instruction. + return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; +} + + +bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + return Opcode == Hexagon::J2_loop0i || + Opcode == Hexagon::J2_loop0r || + Opcode == Hexagon::J2_loop0iext || + Opcode == Hexagon::J2_loop0rext || + Opcode == Hexagon::J2_loop1i || + Opcode == Hexagon::J2_loop1r || + Opcode == Hexagon::J2_loop1iext || + Opcode == Hexagon::J2_loop1rext; +} + + +bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::L4_iadd_memopw_io : + case Hexagon::L4_isub_memopw_io : + case Hexagon::L4_add_memopw_io : + case Hexagon::L4_sub_memopw_io : + case Hexagon::L4_and_memopw_io : + case Hexagon::L4_or_memopw_io : + case Hexagon::L4_iadd_memoph_io : + case Hexagon::L4_isub_memoph_io : + case Hexagon::L4_add_memoph_io : + case Hexagon::L4_sub_memoph_io : + case Hexagon::L4_and_memoph_io : + case Hexagon::L4_or_memoph_io : + case Hexagon::L4_iadd_memopb_io : + case Hexagon::L4_isub_memopb_io : + case Hexagon::L4_add_memopb_io : + case Hexagon::L4_sub_memopb_io : + case Hexagon::L4_and_memopb_io : + case Hexagon::L4_or_memopb_io : + case Hexagon::L4_ior_memopb_io: + case Hexagon::L4_ior_memoph_io: + case Hexagon::L4_ior_memopw_io: + case Hexagon::L4_iand_memopb_io: + case Hexagon::L4_iand_memoph_io: + case Hexagon::L4_iand_memopw_io: + return true; + } + return false; +} + + +bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; +} + + +bool HexagonInstrInfo::isNewValue(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; +} + + +bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { + return isNewValueJump(MI) || isNewValueStore(MI); +} + + +bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { + return isNewValue(MI) && MI->isBranch(); +} + + +bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const { + return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); +} + + +bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; +} + + +bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; +} + + +// Returns true if a particular operand is extendable for an instruction. +bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, + unsigned OperandNum) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; +} + + +bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const { + return getAddrMode(MI) == HexagonII::PostInc; +} + + +bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + assert(isPredicated(MI)); + return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; +} + + +bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + assert(isPredicated(Opcode)); + return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; +} + + +bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return !((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask); +} + + +bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + // Make sure that the instruction is predicated. + assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); + return !((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask); +} + + +bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; +} + + +bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask; +} + + +bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + assert(get(Opcode).isBranch() && + (isPredicatedNew(Opcode) || isNewValue(Opcode))); + return (F >> HexagonII::TakenPos) & HexagonII::TakenMask; +} + + +bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { + return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || + MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT; +} + + +bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::SoloPos) & HexagonII::SoloMask; +} + + +bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::STriw_pred : + case Hexagon::LDriw_pred : + return true; + default: + return false; + } +} + + +// Returns true when SU has a timing class TC1. +bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: + case Hexagon::Sched::ALU64_tc_1_SLOT23: + case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: + //case Hexagon::Sched::M_tc_1_SLOT23: + case Hexagon::Sched::S_2op_tc_1_SLOT23: + case Hexagon::Sched::S_3op_tc_1_SLOT23: + return true; + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: + case Hexagon::Sched::ALU64_tc_2_SLOT23: + case Hexagon::Sched::CR_tc_2_SLOT3: + case Hexagon::Sched::M_tc_2_SLOT23: + case Hexagon::Sched::S_2op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_2_SLOT23: + return true; + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123: + case Hexagon::Sched::ALU64_tc_2early_SLOT23: + case Hexagon::Sched::CR_tc_2early_SLOT23: + case Hexagon::Sched::CR_tc_2early_SLOT3: + case Hexagon::Sched::J_tc_2early_SLOT0123: + case Hexagon::Sched::J_tc_2early_SLOT2: + case Hexagon::Sched::J_tc_2early_SLOT23: + case Hexagon::Sched::S_2op_tc_2early_SLOT23: + case Hexagon::Sched::S_3op_tc_2early_SLOT23: + return true; + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const { + if (!MI) + return false; + + unsigned SchedClass = MI->getDesc().getSchedClass(); + return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; +} + + +bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const { + if (!MI) + return false; + + const uint64_t V = getType(MI); + return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; +} + + +// Check if the Offset is a valid auto-inc imm by Load/Store Type. +// +bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { + if (VT == MVT::v16i32 || VT == MVT::v8i64 || + VT == MVT::v32i16 || VT == MVT::v64i8) { + return (Offset >= Hexagon_MEMV_AUTOINC_MIN && + Offset <= Hexagon_MEMV_AUTOINC_MAX && + (Offset & 0x3f) == 0); + } + // 128B + if (VT == MVT::v32i32 || VT == MVT::v16i64 || + VT == MVT::v64i16 || VT == MVT::v128i8) { + return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B && + Offset <= Hexagon_MEMV_AUTOINC_MAX_128B && + (Offset & 0x7f) == 0); + } + if (VT == MVT::i64) { + return (Offset >= Hexagon_MEMD_AUTOINC_MIN && + Offset <= Hexagon_MEMD_AUTOINC_MAX && + (Offset & 0x7) == 0); + } + if (VT == MVT::i32) { + return (Offset >= Hexagon_MEMW_AUTOINC_MIN && + Offset <= Hexagon_MEMW_AUTOINC_MAX && + (Offset & 0x3) == 0); + } + if (VT == MVT::i16) { + return (Offset >= Hexagon_MEMH_AUTOINC_MIN && + Offset <= Hexagon_MEMH_AUTOINC_MAX && + (Offset & 0x1) == 0); + } + if (VT == MVT::i8) { + return (Offset >= Hexagon_MEMB_AUTOINC_MIN && + Offset <= Hexagon_MEMB_AUTOINC_MAX); + } + llvm_unreachable("Not an auto-inc opc!"); +} + + +bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, + bool Extend) const { + // This function is to check whether the "Offset" is in the correct range of + // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is + // inserted to calculate the final address. Due to this reason, the function + // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. + + switch (Opcode) { + case Hexagon::STriq_pred_V6: + case Hexagon::STriq_pred_vec_V6: + case Hexagon::STriv_pseudo_V6: + case Hexagon::STrivv_pseudo_V6: + case Hexagon::LDriq_pred_V6: + case Hexagon::LDriq_pred_vec_V6: + case Hexagon::LDriv_pseudo_V6: + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDrivv_indexed: + case Hexagon::STrivv_indexed: + case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vL32Ub_ai: + case Hexagon::V6_vS32Ub_ai: + return (Offset >= Hexagon_MEMV_OFFSET_MIN) && + (Offset <= Hexagon_MEMV_OFFSET_MAX); + + case Hexagon::STriq_pred_V6_128B: + case Hexagon::STriq_pred_vec_V6_128B: + case Hexagon::STriv_pseudo_V6_128B: + case Hexagon::STrivv_pseudo_V6_128B: + case Hexagon::LDriq_pred_V6_128B: + case Hexagon::LDriq_pred_vec_V6_128B: + case Hexagon::LDriv_pseudo_V6_128B: + case Hexagon::LDrivv_pseudo_V6_128B: + case Hexagon::LDrivv_indexed_128B: + case Hexagon::STrivv_indexed_128B: + case Hexagon::V6_vL32b_ai_128B: + case Hexagon::V6_vS32b_ai_128B: + case Hexagon::V6_vL32Ub_ai_128B: + case Hexagon::V6_vS32Ub_ai_128B: + return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) && + (Offset <= Hexagon_MEMV_OFFSET_MAX_128B); + + case Hexagon::J2_loop0i: + case Hexagon::J2_loop1i: + return isUInt<10>(Offset); + } + + if (Extend) + return true; + + switch (Opcode) { + case Hexagon::L2_loadri_io: + case Hexagon::S2_storeri_io: + return (Offset >= Hexagon_MEMW_OFFSET_MIN) && + (Offset <= Hexagon_MEMW_OFFSET_MAX); + + case Hexagon::L2_loadrd_io: + case Hexagon::S2_storerd_io: + return (Offset >= Hexagon_MEMD_OFFSET_MIN) && + (Offset <= Hexagon_MEMD_OFFSET_MAX); + + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + case Hexagon::S2_storerh_io: + return (Offset >= Hexagon_MEMH_OFFSET_MIN) && + (Offset <= Hexagon_MEMH_OFFSET_MAX); + + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: + case Hexagon::S2_storerb_io: + return (Offset >= Hexagon_MEMB_OFFSET_MIN) && + (Offset <= Hexagon_MEMB_OFFSET_MAX); + + case Hexagon::A2_addi: + return (Offset >= Hexagon_ADDI_OFFSET_MIN) && + (Offset <= Hexagon_ADDI_OFFSET_MAX); + + case Hexagon::L4_iadd_memopw_io : + case Hexagon::L4_isub_memopw_io : + case Hexagon::L4_add_memopw_io : + case Hexagon::L4_sub_memopw_io : + case Hexagon::L4_and_memopw_io : + case Hexagon::L4_or_memopw_io : + return (0 <= Offset && Offset <= 255); + + case Hexagon::L4_iadd_memoph_io : + case Hexagon::L4_isub_memoph_io : + case Hexagon::L4_add_memoph_io : + case Hexagon::L4_sub_memoph_io : + case Hexagon::L4_and_memoph_io : + case Hexagon::L4_or_memoph_io : + return (0 <= Offset && Offset <= 127); + + case Hexagon::L4_iadd_memopb_io : + case Hexagon::L4_isub_memopb_io : + case Hexagon::L4_add_memopb_io : + case Hexagon::L4_sub_memopb_io : + case Hexagon::L4_and_memopb_io : + case Hexagon::L4_or_memopb_io : + return (0 <= Offset && Offset <= 63); + + // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of + // any size. Later pass knows how to handle it. + case Hexagon::STriw_pred: + case Hexagon::LDriw_pred: + return true; + + case Hexagon::TFR_FI: + case Hexagon::TFR_FIA: + case Hexagon::INLINEASM: + return true; + + case Hexagon::L2_ploadrbt_io: + case Hexagon::L2_ploadrbf_io: + case Hexagon::L2_ploadrubt_io: + case Hexagon::L2_ploadrubf_io: + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + return isUInt<6>(Offset); + + case Hexagon::L2_ploadrht_io: + case Hexagon::L2_ploadrhf_io: + case Hexagon::L2_ploadruht_io: + case Hexagon::L2_ploadruhf_io: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + return isShiftedUInt<6,1>(Offset); + + case Hexagon::L2_ploadrit_io: + case Hexagon::L2_ploadrif_io: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + return isShiftedUInt<6,2>(Offset); + + case Hexagon::L2_ploadrdt_io: + case Hexagon::L2_ploadrdf_io: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: + return isShiftedUInt<6,3>(Offset); + } // switch + + llvm_unreachable("No offset range is defined for this opcode. " + "Please define it in the above switch statement!"); +} + + +bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const { + return MI && isV60VectorInstruction(MI) && isAccumulator(MI); +} + + +bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const { + if (!MI) + return false; + const uint64_t F = get(MI->getOpcode()).TSFlags; + const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask); + return + V == HexagonII::TypeCVI_VA || + V == HexagonII::TypeCVI_VA_DV; +} + + +bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI)) + return true; + + if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI))) + return true; + + if (mayBeNewStore(ConsMI)) + return true; + + return false; +} + + +/// \brief Can these instructions execute at the same time in a bundle. +bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First, + const MachineInstr *Second) const { + if (DisableNVSchedule) + return false; + if (mayBeNewStore(Second)) { + // Make sure the definition of the first instruction is the value being + // stored. + const MachineOperand &Stored = + Second->getOperand(Second->getNumOperands() - 1); + if (!Stored.isReg()) + return false; + for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) { + const MachineOperand &Op = First->getOperand(i); + if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg()) + return true; + } + } + return false; +} + + +bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { + for (auto &I : *B) + if (I.isEHLabel()) + return true; + return false; +} + + +// Returns true if an instruction can be converted into a non-extended +// equivalent instruction. +bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const { + short NonExtOpcode; + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + if (Hexagon::getRegForm(MI->getOpcode()) >= 0) + return true; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retrieve non-ext equivalent instruction. + + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + // Load/store with absolute addressing mode can be converted into + // base+offset mode. + NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode()); + break; + case HexagonII::BaseImmOffset : + // Load/store with base+offset addressing mode can be converted into + // base+register offset addressing mode. However left shift operand should + // be set to 0. + NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); + break; + case HexagonII::BaseLongOffset: + NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode()); + break; + default: + return false; + } + if (NonExtOpcode < 0) + return false; + return true; + } + return false; +} + + +bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), + Hexagon::InstrType_Pseudo) >= 0; +} + + +bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) + const { + MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); + while (I != E) { + if (I->isBarrier()) + return true; + ++I; + } + return false; +} + + +// Returns true, if a LD insn can be promoted to a cur load. +bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const { + auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>(); + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) && + HST.hasV60TOps(); +} + + +// Returns true, if a ST insn can be promoted to a new-value store. +bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; +} + + +bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + // There is no stall when ProdMI is not a V60 vector. + if (!isV60VectorInstruction(ProdMI)) + return false; + + // There is no stall when ProdMI and ConsMI are not dependent. + if (!isDependent(ProdMI, ConsMI)) + return false; + + // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI + // are scheduled in consecutive packets. + if (isVecUsableNextPacket(ProdMI, ConsMI)) + return false; + + return true; +} + + +bool HexagonInstrInfo::producesStall(const MachineInstr *MI, + MachineBasicBlock::const_instr_iterator BII) const { + // There is no stall when I is not a V60 vector. + if (!isV60VectorInstruction(MI)) + return false; + + MachineBasicBlock::const_instr_iterator MII = BII; + MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); + + if (!(*MII).isBundle()) { + const MachineInstr *J = &*MII; + if (!isV60VectorInstruction(J)) + return false; + else if (isVecUsableNextPacket(J, MI)) + return false; + return true; + } + + for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { + const MachineInstr *J = &*MII; + if (producesStall(J, MI)) + return true; + } + return false; +} + + +bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI, + unsigned PredReg) const { + for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg)) + return false; // Predicate register must be explicitly defined. + } + + // Hexagon Programmer's Reference says that decbin, memw_locked, and + // memd_locked cannot be used as .new as well, + // but we don't seem to have these instructions defined. + return MI->getOpcode() != Hexagon::A4_tlbmatch; +} + + +bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { + return (Opcode == Hexagon::J2_jumpt) || + (Opcode == Hexagon::J2_jumpf) || + (Opcode == Hexagon::J2_jumptnew) || + (Opcode == Hexagon::J2_jumpfnew) || + (Opcode == Hexagon::J2_jumptnewpt) || + (Opcode == Hexagon::J2_jumpfnewpt); +} + + +bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const { + if (Cond.empty() || !isPredicated(Cond[0].getImm())) + return false; + return !isPredicatedTrue(Cond[0].getImm()); +} + + +unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; +} + + +// Returns the base register in a memory access (load/store). The offset is +// returned in Offset and the access size is returned in AccessSize. +unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI, + int &Offset, unsigned &AccessSize) const { + // Return if it is not a base+offset type instruction or a MemOp. + if (getAddrMode(MI) != HexagonII::BaseImmOffset && + getAddrMode(MI) != HexagonII::BaseLongOffset && + !isMemOp(MI) && !isPostIncrement(MI)) + return 0; + + // Since it is a memory access instruction, getMemAccessSize() should never + // return 0. + assert (getMemAccessSize(MI) && + "BaseImmOffset or BaseLongOffset or MemOp without accessSize"); + + // Return Values of getMemAccessSize() are + // 0 - Checked in the assert above. + // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these. + // MemAccessSize is represented as 1+log2(N) where N is size in bits. + AccessSize = (1U << (getMemAccessSize(MI) - 1)); + + unsigned basePos = 0, offsetPos = 0; + if (!getBaseAndOffsetPosition(MI, basePos, offsetPos)) + return 0; + + // Post increment updates its EA after the mem access, + // so we need to treat its offset as zero. + if (isPostIncrement(MI)) + Offset = 0; + else { + Offset = MI->getOperand(offsetPos).getImm(); + } + + return MI->getOperand(basePos).getReg(); +} + + +/// Return the position of the base and offset operands for this instruction. +bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI, + unsigned &BasePos, unsigned &OffsetPos) const { + // Deal with memops first. + if (isMemOp(MI)) { + assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Bad Memop."); + BasePos = 0; + OffsetPos = 1; + } else if (MI->mayStore()) { + BasePos = 0; + OffsetPos = 1; + } else if (MI->mayLoad()) { + BasePos = 1; + OffsetPos = 2; + } else + return false; + + if (isPredicated(MI)) { + BasePos++; + OffsetPos++; + } + if (isPostIncrement(MI)) { + BasePos++; + OffsetPos++; + } + + if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm()) + return false; + + return true; +} + + +// Inserts branching instructions in reverse order of their occurence. +// e.g. jump_t t1 (i1) +// jump t2 (i2) +// Jumpers = {i2, i1} +SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs( + MachineBasicBlock& MBB) const { + SmallVector<MachineInstr*, 2> Jumpers; + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + if (I == MBB.instr_begin()) + return Jumpers; + + // A basic block may looks like this: + // + // [ insn + // EH_LABEL + // insn + // insn + // insn + // EH_LABEL + // insn ] + // + // It has two succs but does not have a terminator + // Don't know how to handle it. + do { + --I; + if (I->isEHLabel()) + return Jumpers; + } while (I != MBB.instr_begin()); + + I = MBB.instr_end(); + --I; + + while (I->isDebugValue()) { + if (I == MBB.instr_begin()) + return Jumpers; + --I; + } + if (!isUnpredicatedTerminator(&*I)) + return Jumpers; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + Jumpers.push_back(LastInst); + MachineInstr *SecondLastInst = nullptr; + // Find one more terminator if present. + do { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { + if (!SecondLastInst) { + SecondLastInst = &*I; + Jumpers.push_back(SecondLastInst); + } else // This is a third branch. + return Jumpers; + } + if (I == MBB.instr_begin()) + break; + --I; + } while (true); + return Jumpers; +} + + +// Returns Operand Index for the constant extended instruction. +unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask; +} + +// See if instruction could potentially be a duplex candidate. +// If so, return its group. Zero otherwise. +HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( + const MachineInstr *MI) const { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + + switch (MI->getOpcode()) { + default: + return HexagonII::HCG_None; + // + // Compound pairs. + // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2" + // "Rd16=#U6 ; jump #r9:2" + // "Rd16=Rs16 ; jump #r9:2" + // + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg)) + return HexagonII::HCG_A; + break; + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + // P0 = cmp.eq(Rs,#u2) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && + ((isUInt<5>(MI->getOperand(2).getImm())) || + (MI->getOperand(2).getImm() == -1))) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfrsi: + // Rd = #u6 + // Do not test for #u6 size since the const is getting extended + // regardless and compound could be formed. + DstReg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(DstReg)) + return HexagonII::HCG_A; + break; + case Hexagon::S2_tstbit_i: + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + MI->getOperand(2).isImm() && + isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0)) + return HexagonII::HCG_A; + break; + // The fact that .new form is used pretty much guarantees + // that predicate register will match. Nevertheless, + // there could be some false positives without additional + // checking. + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnewpt: + Src1Reg = MI->getOperand(0).getReg(); + if (Hexagon::PredRegsRegClass.contains(Src1Reg) && + (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg)) + return HexagonII::HCG_B; + break; + // Transfer and jump: + // Rd=#U6 ; jump #r9:2 + // Rd=Rs ; jump #r9:2 + // Do not test for jump range here. + case Hexagon::J2_jump: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + return HexagonII::HCG_C; + break; + } + + return HexagonII::HCG_None; +} + + +// Returns -1 when there is no opcode found. +unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA, + const MachineInstr *GB) const { + assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A); + assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B); + if ((GA->getOpcode() != Hexagon::C2_cmpeqi) || + (GB->getOpcode() != Hexagon::J2_jumptnew)) + return -1; + unsigned DestReg = GA->getOperand(0).getReg(); + if (!GB->readsRegister(DestReg)) + return -1; + if (DestReg == Hexagon::P0) + return Hexagon::J4_cmpeqi_tp0_jump_nt; + if (DestReg == Hexagon::P1) + return Hexagon::J4_cmpeqi_tp1_jump_nt; + return -1; +} + + +int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { + enum Hexagon::PredSense inPredSense; + inPredSense = invertPredicate ? Hexagon::PredSense_false : + Hexagon::PredSense_true; + int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); + if (CondOpcode >= 0) // Valid Conditional opcode/instruction + return CondOpcode; + + // This switch case will be removed once all the instructions have been + // modified to use relation maps. + switch(Opc) { + case Hexagon::TFRI_f: + return !invertPredicate ? Hexagon::TFRI_cPt_f : + Hexagon::TFRI_cNotPt_f; + } + + llvm_unreachable("Unexpected predicable instruction"); +} + + +// Return the cur value instruction for a given store. +int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown .cur type"); + case Hexagon::V6_vL32b_pi: + return Hexagon::V6_vL32b_cur_pi; + case Hexagon::V6_vL32b_ai: + return Hexagon::V6_vL32b_cur_ai; + //128B + case Hexagon::V6_vL32b_pi_128B: + return Hexagon::V6_vL32b_cur_pi_128B; + case Hexagon::V6_vL32b_ai_128B: + return Hexagon::V6_vL32b_cur_ai_128B; + } + return 0; +} + + + +// The diagram below shows the steps involved in the conversion of a predicated +// store instruction to its .new predicated new-value form. +// +// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] +// ^ ^ +// / \ (not OK. it will cause new-value store to be +// / X conditional on p0.new while R2 producer is +// / \ on p0) +// / \. +// p.new store p.old NV store +// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new] +// ^ ^ +// \ / +// \ / +// \ / +// p.old store +// [if (p0)memw(R0+#0)=R2] +// +// +// The following set of instructions further explains the scenario where +// conditional new-value store becomes invalid when promoted to .new predicate +// form. +// +// { 1) if (p0) r0 = add(r1, r2) +// 2) p0 = cmp.eq(r3, #0) } +// +// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with +// the first two instructions because in instr 1, r0 is conditional on old value +// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which +// is not valid for new-value stores. +// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded +// from the "Conditional Store" list. Because a predicated new value store +// would NOT be promoted to a double dot new store. See diagram below: +// This function returns yes for those stores that are predicated but not +// yet promoted to predicate dot new instructions. +// +// +---------------------+ +// /-----| if (p0) memw(..)=r0 |---------\~ +// || +---------------------+ || +// promote || /\ /\ || promote +// || /||\ /||\ || +// \||/ demote || \||/ +// \/ || || \/ +// +-------------------------+ || +-------------------------+ +// | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new | +// +-------------------------+ || +-------------------------+ +// || || || +// || demote \||/ +// promote || \/ NOT possible +// || || /\~ +// \||/ || /||\~ +// \/ || || +// +-----------------------------+ +// | if (p0.new) memw(..)=r0.new | +// +-----------------------------+ +// Double Dot New Store +// +// Returns the most basic instruction for the .new predicated instructions and +// new-value stores. +// For example, all of the following instructions will be converted back to the +// same instruction: +// 1) if (p0.new) memw(R0+#0) = R1.new ---> +// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1 +// 3) if (p0.new) memw(R0+#0) = R1 ---> +// +// To understand the translation of instruction 1 to its original form, consider +// a packet with 3 instructions. +// { p0 = cmp.eq(R0,R1) +// if (p0.new) R2 = add(R3, R4) +// R5 = add (R3, R1) +// } +// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet +// +// This instruction can be part of the previous packet only if both p0 and R2 +// are promoted to .new values. This promotion happens in steps, first +// predicate register is promoted to .new and in the next iteration R2 is +// promoted. Therefore, in case of dependence check failure (due to R5) during +// next iteration, it should be converted back to its most basic form. + + +// Return the new value instruction for a given store. +int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const { + int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode()); + if (NVOpcode >= 0) // Valid new-value store instruction. + return NVOpcode; + + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown .new type"); + case Hexagon::S4_storerb_ur: + return Hexagon::S4_storerbnew_ur; + + case Hexagon::S2_storerb_pci: + return Hexagon::S2_storerb_pci; + + case Hexagon::S2_storeri_pci: + return Hexagon::S2_storeri_pci; + + case Hexagon::S2_storerh_pci: + return Hexagon::S2_storerh_pci; + + case Hexagon::S2_storerd_pci: + return Hexagon::S2_storerd_pci; + + case Hexagon::S2_storerf_pci: + return Hexagon::S2_storerf_pci; + + case Hexagon::V6_vS32b_ai: + return Hexagon::V6_vS32b_new_ai; + + case Hexagon::V6_vS32b_pi: + return Hexagon::V6_vS32b_new_pi; + + // 128B + case Hexagon::V6_vS32b_ai_128B: + return Hexagon::V6_vS32b_new_ai_128B; + + case Hexagon::V6_vS32b_pi_128B: + return Hexagon::V6_vS32b_new_pi_128B; + } + return 0; +} + +// Returns the opcode to use when converting MI, which is a conditional jump, +// into a conditional instruction which uses the .new value of the predicate. +// We also use branch probabilities to add a hint to the jump. +int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const { + // We assume that block can have at most two successors. + bool taken = false; + const MachineBasicBlock *Src = MI->getParent(); + const MachineOperand *BrTarget = &MI->getOperand(1); + const MachineBasicBlock *Dst = BrTarget->getMBB(); + + const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst); + if (Prediction >= BranchProbability(1,2)) + taken = true; + + switch (MI->getOpcode()) { + case Hexagon::J2_jumpt: + return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew; + case Hexagon::J2_jumpf: + return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew; + + default: + llvm_unreachable("Unexpected jump instruction."); + } +} + + +// Return .new predicate version for an instruction. +int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const { + int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + if (NewOpcode >= 0) // Valid predicate new instruction + return NewOpcode; + + switch (MI->getOpcode()) { + // Condtional Jumps + case Hexagon::J2_jumpt: + case Hexagon::J2_jumpf: + return getDotNewPredJumpOp(MI, MBPI); + + default: + assert(0 && "Unknown .new type"); + } + return 0; +} + + +int HexagonInstrInfo::getDotOldOp(const int opc) const { + int NewOp = opc; + if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form + NewOp = Hexagon::getPredOldOpcode(NewOp); + assert(NewOp >= 0 && + "Couldn't change predicate new instruction to its old form."); + } + + if (isNewValueStore(NewOp)) { // Convert into non-new-value format + NewOp = Hexagon::getNonNVStore(NewOp); + assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); + } + return NewOp; +} + + +// See if instruction could potentially be a duplex candidate. +// If so, return its group. Zero otherwise. +HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( + const MachineInstr *MI) const { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + auto &HRI = getRegisterInfo(); + + switch (MI->getOpcode()) { + default: + return HexagonII::HSIG_None; + // + // Group L1: + // + // Rd = memw(Rs+#u4:2) + // Rd = memub(Rs+#u4:0) + case Hexagon::L2_loadri_io: + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + // Special case this one from Group L2. + // Rd = memw(r29+#u5:2) + if (isIntRegForSubInst(DstReg)) { + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && + MI->getOperand(2).isImm() && + isShiftedUInt<5,2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + // Rd = memw(Rs+#u4:2) + if (isIntRegForSubInst(SrcReg) && + (MI->getOperand(2).isImm() && + isShiftedUInt<4,2>(MI->getOperand(2).getImm()))) + return HexagonII::HSIG_L1; + } + break; + case Hexagon::L2_loadrub_io: + // Rd = memub(Rs+#u4:0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L1; + break; + // + // Group L2: + // + // Rd = memh/memuh(Rs+#u3:1) + // Rd = memb(Rs+#u3:0) + // Rd = memw(r29+#u5:2) - Handled above. + // Rdd = memd(r29+#u5:3) + // deallocframe + // [if ([!]p0[.new])] dealloc_return + // [if ([!]p0[.new])] jumpr r31 + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + // Rd = memh/memuh(Rs+#u3:1) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + isShiftedUInt<3,1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + case Hexagon::L2_loadrb_io: + // Rd = memb(Rs+#u3:0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + isUInt<3>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + case Hexagon::L2_loadrd_io: + // Rdd = memd(r29+#u5:3) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && + Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && + MI->getOperand(2).isImm() && + isShiftedUInt<5,3>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + // dealloc_return is not documented in Hexagon Manual, but marked + // with A_SUBINSN attribute in iset_v4classic.py. + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + case Hexagon::L4_return: + case Hexagon::L2_deallocframe: + return HexagonII::HSIG_L2; + case Hexagon::EH_RETURN_JMPR: + case Hexagon::JMPret : + // jumpr r31 + // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>. + DstReg = MI->getOperand(0).getReg(); + if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)) + return HexagonII::HSIG_L2; + break; + case Hexagon::JMPrett: + case Hexagon::JMPretf: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPretfnewpt : + case Hexagon::JMPrettnew : + case Hexagon::JMPretfnew : + DstReg = MI->getOperand(1).getReg(); + SrcReg = MI->getOperand(0).getReg(); + // [if ([!]p0[.new])] jumpr r31 + if ((Hexagon::PredRegsRegClass.contains(SrcReg) && + (Hexagon::P0 == SrcReg)) && + (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))) + return HexagonII::HSIG_L2; + break; + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_tnew_pt : + case Hexagon::L4_return_fnew_pt : + // [if ([!]p0[.new])] dealloc_return + SrcReg = MI->getOperand(0).getReg(); + if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg)) + return HexagonII::HSIG_L2; + break; + // + // Group S1: + // + // memw(Rs+#u4:2) = Rt + // memb(Rs+#u4:0) = Rt + case Hexagon::S2_storeri_io: + // Special case this one from Group S2. + // memw(r29+#u5:2) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (Hexagon::IntRegsRegClass.contains(Src1Reg) && + isIntRegForSubInst(Src2Reg) && + HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && + isShiftedUInt<5,2>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S2; + // memw(Rs+#u4:2) = Rt + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && + isShiftedUInt<4,2>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + case Hexagon::S2_storerb_io: + // memb(Rs+#u4:0) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + // + // Group S2: + // + // memh(Rs+#u3:1) = Rt + // memw(r29+#u5:2) = Rt + // memd(r29+#s6:3) = Rtt + // memw(Rs+#u4:2) = #U1 + // memb(Rs+#u4) = #U1 + // allocframe(#u5:3) + case Hexagon::S2_storerh_io: + // memh(Rs+#u3:1) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && + isShiftedUInt<3,1>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + case Hexagon::S2_storerd_io: + // memd(r29+#s6:3) = Rtt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isDblRegForSubInst(Src2Reg, HRI) && + Hexagon::IntRegsRegClass.contains(Src1Reg) && + HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && + isShiftedInt<6,3>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S4_storeiri_io: + // memw(Rs+#u4:2) = #U1 + Src1Reg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && + isShiftedUInt<4,2>(MI->getOperand(1).getImm()) && + MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S4_storeirb_io: + // memb(Rs+#u4) = #U1 + Src1Reg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && + isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() && + MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S2_allocframe: + if (MI->getOperand(0).isImm() && + isShiftedUInt<5,3>(MI->getOperand(0).getImm())) + return HexagonII::HSIG_S1; + break; + // + // Group A: + // + // Rx = add(Rx,#s7) + // Rd = Rs + // Rd = #u6 + // Rd = #-1 + // if ([!]P0[.new]) Rd = #0 + // Rd = add(r29,#u6:2) + // Rx = add(Rx,Rs) + // P0 = cmp.eq(Rs,#u2) + // Rdd = combine(#0,Rs) + // Rdd = combine(Rs,#0) + // Rdd = combine(#u2,#U2) + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + // Rd = sxth/sxtb/zxtb/zxth(Rs) + // Rd = and(Rs,#1) + case Hexagon::A2_addi: + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg)) { + // Rd = add(r29,#u6:2) + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() && + isShiftedUInt<6,2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + // Rx = add(Rx,#s7) + if ((DstReg == SrcReg) && MI->getOperand(2).isImm() && + isInt<7>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && + ((MI->getOperand(2).getImm() == 1) || + (MI->getOperand(2).getImm() == -1))) + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_add: + // Rx = add(Rx,Rs) + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) && + isIntRegForSubInst(Src2Reg)) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_andir: + // Same as zxtb. + // Rd16=and(Rs16,#255) + // Rd16=and(Rs16,#1) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + ((MI->getOperand(2).getImm() == 1) || + (MI->getOperand(2).getImm() == 255))) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_tfrsi: + // Rd = #u6 + // Do not test for #u6 size since the const is getting extended + // regardless and compound could be formed. + // Rd = #-1 + DstReg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(DstReg)) + return HexagonII::HSIG_A; + break; + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmoveif: + case Hexagon::C2_cmovenewif: + // if ([!]P0[.new]) Rd = #0 + // Actual form: + // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>; + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && + Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg && + MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) + return HexagonII::HSIG_A; + break; + case Hexagon::C2_cmpeqi: + // P0 = cmp.eq(Rs,#u2) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + // Rdd = combine(#u2,#U2) + DstReg = MI->getOperand(0).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && + ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) || + (MI->getOperand(1).isGlobal() && + isUInt<2>(MI->getOperand(1).getOffset()))) && + ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) || + (MI->getOperand(2).isGlobal() && + isUInt<2>(MI->getOperand(2).getOffset())))) + return HexagonII::HSIG_A; + break; + case Hexagon::A4_combineri: + // Rdd = combine(Rs,#0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && + ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) || + (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0))) + return HexagonII::HSIG_A; + break; + case Hexagon::A4_combineir: + // Rdd = combine(#0,Rs) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(2).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && + ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) || + (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0))) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + // Rd = sxth/sxtb/zxtb/zxth(Rs) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HSIG_A; + break; + } + + return HexagonII::HSIG_None; +} + + +short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real); +} + + +// Return first non-debug instruction in the basic block. +MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) + const { + for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) { + MachineInstr *MI = &*MII; + if (MI->isDebugValue()) + continue; + return MI; + } + return nullptr; +} + + +unsigned HexagonInstrInfo::getInstrTimingClassLatency( + const InstrItineraryData *ItinData, const MachineInstr *MI) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return getInstrLatency(ItinData, MI); + + // Get the latency embedded in the itinerary. If we're not using timing class + // latencies or if we using BSB scheduling, then restrict the maximum latency + // to 1 (that is, either 0 or 1). + if (MI->isTransient()) + return 0; + unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass()); + if (!EnableTimingClassLatency || + MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>(). + useBSBScheduling()) + if (Latency > 1) + Latency = 1; + return Latency; +} + + +// inverts the predication logic. +// p -> NotP +// NotP -> P +bool HexagonInstrInfo::getInvertedPredSense( + SmallVectorImpl<MachineOperand> &Cond) const { + if (Cond.empty()) + return false; + unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm()); + Cond[0].setImm(Opc); + return true; +} + + +unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { + int InvPredOpcode; + InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) + : Hexagon::getTruePredOpcode(Opc); + if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate. + return InvPredOpcode; + + llvm_unreachable("Unexpected predicated instruction"); +} + + +// Returns the max value that doesn't need to be extended. +int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1U << (bits - 1)); + else + return ~(-1U << bits); +} + + +unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask; +} + + +// Returns the min value that doesn't need to be extended. +int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1U << (bits - 1); + else + return 0; +} + + +// Returns opcode of the non-extended equivalent instruction. +short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const { + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode()); + if (NonExtOpcode >= 0) + return NonExtOpcode; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retrieve non-ext equivalent instruction. + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + return Hexagon::getBaseWithImmOffset(MI->getOpcode()); + case HexagonII::BaseImmOffset : + return Hexagon::getBaseWithRegOffset(MI->getOpcode()); + case HexagonII::BaseLongOffset: + return Hexagon::getRegShlForm(MI->getOpcode()); + + default: + return -1; + } + } + return -1; +} + + +bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, + unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const { + if (Cond.empty()) + return false; + assert(Cond.size() == 2); + if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) { + DEBUG(dbgs() << "No predregs for new-value jumps/endloop"); + return false; + } + PredReg = Cond[1].getReg(); + PredRegPos = 1; + // See IfConversion.cpp why we add RegState::Implicit | RegState::Undef + PredRegFlags = 0; + if (Cond[1].isImplicit()) + PredRegFlags = RegState::Implicit; + if (Cond[1].isUndef()) + PredRegFlags |= RegState::Undef; + return true; +} + + +short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo); +} + + +short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const { + return Hexagon::getRegForm(MI->getOpcode()); +} + + +// Return the number of bytes required to encode the instruction. +// Hexagon instructions are fixed length, 4 bytes, unless they +// use a constant extender, which requires another 4 bytes. +// For debug instructions and prolog labels, return 0. +unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { + if (MI->isDebugValue() || MI->isPosition()) + return 0; + + unsigned Size = MI->getDesc().getSize(); + if (!Size) + // Assume the default insn size in case it cannot be determined + // for whatever reason. + Size = HEXAGON_INSTR_SIZE; + + if (isConstExtended(MI) || isExtended(MI)) + Size += HEXAGON_INSTR_SIZE; + + // Try and compute number of instructions in asm. + if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) { + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + // Disassemble the AsmStr and approximate number of instructions. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + Size = getInlineAsmLength(AsmStr, *MAI); + } + + return Size; +} + + +uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::TypePos) & HexagonII::TypeMask; +} + + +unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const { + const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget(); + const InstrItineraryData &II = *ST.getInstrItineraryData(); + const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass()); + + return IS.getUnits(); +} + + +unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; +} + + +// Calculate size of the basic block without debug instructions. +unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { + return nonDbgMICount(BB->instr_begin(), BB->instr_end()); +} + + +unsigned HexagonInstrInfo::nonDbgBundleSize( + MachineBasicBlock::const_iterator BundleHead) const { + assert(BundleHead->isBundle() && "Not a bundle header"); + auto MII = BundleHead.getInstrIterator(); + // Skip the bundle header. + return nonDbgMICount(++MII, getBundleEnd(BundleHead)); +} + + +/// immediateExtend - Changes the instruction in place to one using an immediate +/// extender. +void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { + assert((isExtendable(MI)||isConstExtended(MI)) && + "Instruction must be extendable"); + // Find which operand is extendable. + short ExtOpNum = getCExtOpNum(MI); + MachineOperand &MO = MI->getOperand(ExtOpNum); + // This needs to be something we understand. + assert((MO.isMBB() || MO.isImm()) && + "Branch with unknown extendable field type"); + // Mark given operand as extended. + MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); +} + + +bool HexagonInstrInfo::invertAndChangeJumpTarget( + MachineInstr* MI, MachineBasicBlock* NewTarget) const { + DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" + << NewTarget->getNumber(); MI->dump();); + assert(MI->isBranch()); + unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode()); + int TargetPos = MI->getNumOperands() - 1; + // In general branch target is the last operand, + // but some implicit defs added at the end might change it. + while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB()) + --TargetPos; + assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB()); + MI->getOperand(TargetPos).setMBB(NewTarget); + if (EnableBranchPrediction && isPredicatedNew(MI)) { + NewOpcode = reversePrediction(NewOpcode); + } + MI->setDesc(get(NewOpcode)); + return true; +} + + +void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { + /* +++ The code below is used to generate complete set of Hexagon Insn +++ */ + MachineFunction::iterator A = MF.begin(); + MachineBasicBlock &B = *A; + MachineBasicBlock::iterator I = B.begin(); + MachineInstr *MI = &*I; + DebugLoc DL = MI->getDebugLoc(); + MachineInstr *NewMI; + + for (unsigned insn = TargetOpcode::GENERIC_OP_END+1; + insn < Hexagon::INSTRUCTION_LIST_END; ++insn) { + NewMI = BuildMI(B, MI, DL, get(insn)); + DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) << + " Class: " << NewMI->getDesc().getSchedClass()); + NewMI->eraseFromParent(); + } + /* --- The code above is used to generate complete set of Hexagon Insn --- */ +} + + +// inverts the predication logic. +// p -> NotP +// NotP -> P +bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const { + DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump()); + MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode()))); + return true; +} + + +// Reverse the branch prediction. +unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { + int PredRevOpcode = -1; + if (isPredictedTaken(Opcode)) + PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode); + else + PredRevOpcode = Hexagon::takenBranchPrediction(Opcode); + assert(PredRevOpcode > 0); + return PredRevOpcode; +} + + +// TODO: Add more rigorous validation. +bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond) + const { + return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h new file mode 100644 index 0000000..9530d9f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -0,0 +1,402 @@ +//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H + +#include "HexagonRegisterInfo.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "HexagonGenInstrInfo.inc" + +namespace llvm { + +struct EVT; +class HexagonSubtarget; + +class HexagonInstrInfo : public HexagonGenInstrInfo { + virtual void anchor(); + const HexagonRegisterInfo RI; + +public: + explicit HexagonInstrInfo(HexagonSubtarget &ST); + + /// TargetInstrInfo overrides. + /// + + /// If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + /// If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + /// Analyze the branching code at the end of MBB, returning + /// true if it cannot be understood (e.g. it's a switch dispatch or isn't + /// implemented for a target). Upon success, this returns false and returns + /// with the following information in various cases: + /// + /// 1. If this block ends with no branches (it just falls through to its succ) + /// just return false, leaving TBB/FBB null. + /// 2. If this block ends with only an unconditional branch, it sets TBB to be + /// the destination block. + /// 3. If this block ends with a conditional branch and it falls through to a + /// successor block, it sets TBB to be the branch destination block and a + /// list of operands that evaluate the condition. These operands can be + /// passed to other TargetInstrInfo methods to create new branches. + /// 4. If this block ends with a conditional branch followed by an + /// unconditional branch, it returns the 'true' destination in TBB, the + /// 'false' destination in FBB, and a list of operands that evaluate the + /// condition. These operands can be passed to other TargetInstrInfo + /// methods to create new branches. + /// + /// Note that RemoveBranch and InsertBranch must be implemented to support + /// cases where this method returns success. + /// + /// If AllowModify is true, then this routine is allowed to modify the basic + /// block (e.g. delete instructions after the unconditional branch). + /// + bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const override; + + /// Remove the branching code at the end of the specific MBB. + /// This is only invoked in cases where AnalyzeBranch returns success. It + /// returns the number of instructions that were removed. + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + + /// Insert branch code into the end of the specified MachineBasicBlock. + /// The operands to this method are the same as those + /// returned by AnalyzeBranch. This is only invoked in cases where + /// AnalyzeBranch returns success. It returns the number of instructions + /// inserted. + /// + /// It is also invoked by tail merging to add unconditional branches in + /// cases where AnalyzeBranch doesn't apply because there was no original + /// branch to analyze. At least this much must be implemented, else tail + /// merging needs to be disabled. + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, + DebugLoc DL) const override; + + /// Return true if it's profitable to predicate + /// instructions with accumulated instruction latency of "NumCycles" + /// of the specified basic block, where the probability of the instructions + /// being executed is given by Probability, and Confidence is a measure + /// of our confidence that it will be properly predicted. + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + BranchProbability Probability) const override; + + /// Second variant of isProfitableToIfCvt. This one + /// checks for the case where two basic blocks from true and false path + /// of a if-then-else (diamond) are predicated on mutally exclusive + /// predicates, where the probability of the true path being taken is given + /// by Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + BranchProbability Probability) const override; + + /// Return true if it's profitable for if-converter to duplicate instructions + /// of specified accumulated instruction latencies in the specified MBB to + /// enable if-conversion. + /// The probability of the instructions being executed is given by + /// Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const override; + + /// Emit instructions to copy a pair of physical registers. + /// + /// This function should support copies within any legal register class as + /// well as any cross-class copies created during instruction selection. + /// + /// The source and destination registers may overlap, which may require a + /// careful implementation when multiple copy instructions are required for + /// large registers. See for example the ARM target. + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + /// Store the specified register of the given register class to the specified + /// stack frame index. The store instruction is to be added to the given + /// machine basic block before the specified machine instruction. If isKill + /// is true, the register operand is the last use and must be marked kill. + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + /// Load the specified register of the given register class from the specified + /// stack frame index. The load instruction is to be added to the given + /// machine basic block before the specified machine instruction. + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + /// This function is called for all pseudo instructions + /// that remain after register allocation. Many pseudo instructions are + /// created to help register allocation. This is the place to convert them + /// into real instructions. The target can edit MI in place, or it can insert + /// new instructions and erase MI. The function should return true if + /// anything was changed. + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + + /// Reverses the branch condition of the specified condition list, + /// returning false on success and true if it cannot be reversed. + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) + const override; + + /// Insert a noop into the instruction stream at the specified point. + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; + + /// Returns true if the instruction is already predicated. + bool isPredicated(const MachineInstr *MI) const override; + + /// Convert the instruction into a predicated instruction. + /// It returns true if the operation was successful. + bool PredicateInstruction(MachineInstr *MI, + ArrayRef<MachineOperand> Cond) const override; + + /// Returns true if the first specified predicate + /// subsumes the second, e.g. GE subsumes GT. + bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, + ArrayRef<MachineOperand> Pred2) const override; + + /// If the specified instruction defines any predicate + /// or condition code register(s) used for predication, returns true as well + /// as the definition predicate(s) by reference. + bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const override; + + /// Return true if the specified instruction can be predicated. + /// By default, this returns true for every instruction with a + /// PredicateOperand. + bool isPredicable(MachineInstr *MI) const override; + + /// Test if the given instruction should be considered a scheduling boundary. + /// This primarily includes labels and terminators. + bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + + /// Measure the specified inline asm to determine an approximation of its + /// length. + unsigned getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const override; + + /// Allocate and return a hazard recognizer to use for this target when + /// scheduling the machine instructions after register allocation. + ScheduleHazardRecognizer* + CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, + const ScheduleDAG *DAG) const override; + + /// For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const override; + + /// Compute the instruction latency of a given instruction. + /// If the instruction has higher cost when predicated, it's returned via + /// PredCost. + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = 0) const override; + + /// Create machine specific model for scheduling. + DFAPacketizer * + CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override; + + // Sometimes, it is possible for the target + // to tell, even without aliasing information, that two MIs access different + // memory addresses. This function returns true if two MIs access different + // memory addresses and false otherwise. + bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, + AliasAnalysis *AA = nullptr) + const override; + + + /// HexagonInstrInfo specifics. + /// + + const HexagonRegisterInfo &getRegisterInfo() const { return RI; } + + unsigned createVR(MachineFunction* MF, MVT VT) const; + + bool isAbsoluteSet(const MachineInstr* MI) const; + bool isAccumulator(const MachineInstr *MI) const; + bool isComplex(const MachineInstr *MI) const; + bool isCompoundBranchInstr(const MachineInstr *MI) const; + bool isCondInst(const MachineInstr *MI) const; + bool isConditionalALU32 (const MachineInstr* MI) const; + bool isConditionalLoad(const MachineInstr* MI) const; + bool isConditionalStore(const MachineInstr* MI) const; + bool isConditionalTransfer(const MachineInstr* MI) const; + bool isConstExtended(const MachineInstr *MI) const; + bool isDeallocRet(const MachineInstr *MI) const; + bool isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + bool isDotCurInst(const MachineInstr* MI) const; + bool isDotNewInst(const MachineInstr* MI) const; + bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const; + bool isEarlySourceInstr(const MachineInstr *MI) const; + bool isEndLoopN(unsigned Opcode) const; + bool isExpr(unsigned OpType) const; + bool isExtendable(const MachineInstr* MI) const; + bool isExtended(const MachineInstr* MI) const; + bool isFloat(const MachineInstr *MI) const; + bool isHVXMemWithAIndirect(const MachineInstr *I, + const MachineInstr *J) const; + bool isIndirectCall(const MachineInstr *MI) const; + bool isIndirectL4Return(const MachineInstr *MI) const; + bool isJumpR(const MachineInstr *MI) const; + bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const; + bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, + const MachineInstr *ESMI) const; + bool isLateResultInstr(const MachineInstr *MI) const; + bool isLateSourceInstr(const MachineInstr *MI) const; + bool isLoopN(const MachineInstr *MI) const; + bool isMemOp(const MachineInstr *MI) const; + bool isNewValue(const MachineInstr* MI) const; + bool isNewValue(unsigned Opcode) const; + bool isNewValueInst(const MachineInstr* MI) const; + bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueJump(unsigned Opcode) const; + bool isNewValueStore(const MachineInstr* MI) const; + bool isNewValueStore(unsigned Opcode) const; + bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const; + bool isPostIncrement(const MachineInstr* MI) const; + bool isPredicatedNew(const MachineInstr *MI) const; + bool isPredicatedNew(unsigned Opcode) const; + bool isPredicatedTrue(const MachineInstr *MI) const; + bool isPredicatedTrue(unsigned Opcode) const; + bool isPredicated(unsigned Opcode) const; + bool isPredicateLate(unsigned Opcode) const; + bool isPredictedTaken(unsigned Opcode) const; + bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const; + bool isSolo(const MachineInstr* MI) const; + bool isSpillPredRegOp(const MachineInstr *MI) const; + bool isTC1(const MachineInstr *MI) const; + bool isTC2(const MachineInstr *MI) const; + bool isTC2Early(const MachineInstr *MI) const; + bool isTC4x(const MachineInstr *MI) const; + bool isV60VectorInstruction(const MachineInstr *MI) const; + bool isValidAutoIncImm(const EVT VT, const int Offset) const; + bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; + bool isVecAcc(const MachineInstr *MI) const; + bool isVecALU(const MachineInstr *MI) const; + bool isVecUsableNextPacket(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + + + bool canExecuteInBundle(const MachineInstr *First, + const MachineInstr *Second) const; + bool hasEHLabel(const MachineBasicBlock *B) const; + bool hasNonExtEquivalent(const MachineInstr *MI) const; + bool hasPseudoInstrPair(const MachineInstr *MI) const; + bool hasUncondBranch(const MachineBasicBlock *B) const; + bool mayBeCurLoad(const MachineInstr* MI) const; + bool mayBeNewStore(const MachineInstr* MI) const; + bool producesStall(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + bool producesStall(const MachineInstr *MI, + MachineBasicBlock::const_instr_iterator MII) const; + bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const; + bool PredOpcodeHasJMP_c(unsigned Opcode) const; + bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const; + + + unsigned getAddrMode(const MachineInstr* MI) const; + unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset, + unsigned &AccessSize) const; + bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos, + unsigned &OffsetPos) const; + SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const; + unsigned getCExtOpNum(const MachineInstr *MI) const; + HexagonII::CompoundGroup + getCompoundCandidateGroup(const MachineInstr *MI) const; + unsigned getCompoundOpcode(const MachineInstr *GA, + const MachineInstr *GB) const; + int getCondOpcode(int Opc, bool sense) const; + int getDotCurOp(const MachineInstr* MI) const; + int getDotNewOp(const MachineInstr* MI) const; + int getDotNewPredJumpOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; + int getDotNewPredOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; + int getDotOldOp(const int opc) const; + HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI) + const; + short getEquivalentHWInstr(const MachineInstr *MI) const; + MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const; + unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI) const; + bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const; + unsigned getInvertedPredicatedOpcode(const int Opc) const; + int getMaxValue(const MachineInstr *MI) const; + unsigned getMemAccessSize(const MachineInstr* MI) const; + int getMinValue(const MachineInstr *MI) const; + short getNonExtOpcode(const MachineInstr *MI) const; + bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg, + unsigned &PredRegPos, unsigned &PredRegFlags) const; + short getPseudoInstrPair(const MachineInstr *MI) const; + short getRegForm(const MachineInstr *MI) const; + unsigned getSize(const MachineInstr *MI) const; + uint64_t getType(const MachineInstr* MI) const; + unsigned getUnits(const MachineInstr* MI) const; + unsigned getValidSubTargets(const unsigned Opcode) const; + + + /// getInstrTimingClassLatency - Compute the instruction latency of a given + /// instruction using Timing Class information, if available. + unsigned nonDbgBBSize(const MachineBasicBlock *BB) const; + unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const; + + + void immediateExtend(MachineInstr *MI) const; + bool invertAndChangeJumpTarget(MachineInstr* MI, + MachineBasicBlock* NewTarget) const; + void genAllInsnTimingClasses(MachineFunction &MF) const; + bool reversePredSense(MachineInstr* MI) const; + unsigned reversePrediction(unsigned Opcode) const; + bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const; +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td new file mode 100644 index 0000000..5cfeba7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -0,0 +1,5809 @@ +//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormats.td" +include "HexagonOperands.td" +include "HexagonInstrEnc.td" +// Pattern fragment that combines the value type and the register class +// into a single parameter. +// The pat frags in the definitions below need to have a named register, +// otherwise i32 will be assumed regardless of the register class. The +// name of the register does not matter. +def I1 : PatLeaf<(i1 PredRegs:$R)>; +def I32 : PatLeaf<(i32 IntRegs:$R)>; +def I64 : PatLeaf<(i64 DoubleRegs:$R)>; +def F32 : PatLeaf<(f32 IntRegs:$R)>; +def F64 : PatLeaf<(f64 DoubleRegs:$R)>; + +// Pattern fragments to extract the low and high subregisters from a +// 64-bit value. +def LoReg: OutPatFrag<(ops node:$Rs), + (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>; +def HiReg: OutPatFrag<(ops node:$Rs), + (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM1Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-2. +def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-2 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM2Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-3. +def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-3 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM3Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + uint32_t imm = N->getZExtValue(); + return XformUToUM1Imm(imm, SDLoc(N)); +}]>; + +//===----------------------------------------------------------------------===// +// Compare +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isCompare = 1, InputType = "imm", isExtendable = 1, + opExtendable = 2 in +class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp> + : ALU32Inst <(outs PredRegs:$dst), + (ins IntRegs:$src1, ImmOp:$src2), + "$dst = "#!if(isNot, "!","")#mnemonic#"($src1, #$src2)", + [], "",ALU32_2op_tc_2early_SLOT0123 >, ImmRegRel { + bits<2> dst; + bits<5> src1; + bits<10> src2; + let CextOpcode = mnemonic; + let opExtentBits = !if(!eq(mnemonic, "cmp.gtu"), 9, 10); + let isExtentSigned = !if(!eq(mnemonic, "cmp.gtu"), 0, 1); + + let IClass = 0b0111; + + let Inst{27-24} = 0b0101; + let Inst{23-22} = MajOp; + let Inst{21} = !if(!eq(mnemonic, "cmp.gtu"), 0, src2{9}); + let Inst{20-16} = src1; + let Inst{13-5} = src2{8-0}; + let Inst{4} = isNot; + let Inst{3-2} = 0b00; + let Inst{1-0} = dst; + } + +def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10Ext>; +def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10Ext>; +def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9Ext>; + +class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred> + : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)), + (MI IntRegs:$src1, ImmPred:$src2)>; + +def : T_CMP_pat <C2_cmpeqi, seteq, s10ImmPred>; +def : T_CMP_pat <C2_cmpgti, setgt, s10ImmPred>; +def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>; + +//===----------------------------------------------------------------------===// +// ALU32/ALU + +//===----------------------------------------------------------------------===// +// Add. + +def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; + +def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; + +let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in +class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev, + bit IsComm> + : ALU32_rr<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = "#mnemonic#"($Rs, $Rt)", + [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredRel { + let isCommutable = IsComm; + let BaseOpcode = mnemonic#_rr; + let CextOpcode = mnemonic; + + bits<5> Rs; + bits<5> Rt; + bits<5> Rd; + + let IClass = 0b1111; + let Inst{27} = 0b0; + let Inst{26-24} = MajOp; + let Inst{23-21} = MinOp; + let Inst{20-16} = !if(OpsRev,Rt,Rs); + let Inst{12-8} = !if(OpsRev,Rs,Rt); + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0, hasNewValue = 1 in +class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit OpsRev, bit PredNot, bit PredNew> + : ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt), + "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") "# + "$Rd = "#mnemonic#"($Rs, $Rt)", + [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredNewRel { + let isPredicated = 1; + let isPredicatedFalse = PredNot; + let isPredicatedNew = PredNew; + let BaseOpcode = mnemonic#_rr; + let CextOpcode = mnemonic; + + bits<2> Pu; + bits<5> Rs; + bits<5> Rt; + bits<5> Rd; + + let IClass = 0b1111; + let Inst{27} = 0b1; + let Inst{26-24} = MajOp; + let Inst{23-21} = MinOp; + let Inst{20-16} = !if(OpsRev,Rt,Rs); + let Inst{13} = PredNew; + let Inst{12-8} = !if(OpsRev,Rs,Rt); + let Inst{7} = PredNot; + let Inst{6-5} = Pu; + let Inst{4-0} = Rd; +} + +class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp, + bit OpsRev> + : T_ALU32_3op<"", MajOp, MinOp, OpsRev, 0> { + let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")"; +} + +def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>; +def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>; +def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>; +def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>; + +class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp, + bits<3> MinOp, bit OpsRev, bit IsComm> + : T_ALU32_3op<"", MajOp, MinOp, OpsRev, IsComm> { + let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix; +} + +def A2_svaddh : T_ALU32_3op<"vaddh", 0b110, 0b000, 0, 1>; +def A2_svsubh : T_ALU32_3op<"vsubh", 0b110, 0b100, 1, 0>; + +let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in { + def A2_svaddhs : T_ALU32_3op_sfx<"vaddh", ":sat", 0b110, 0b001, 0, 1>; + def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>; + def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>; + def A2_svsubhs : T_ALU32_3op_sfx<"vsubh", ":sat", 0b110, 0b101, 1, 0>; + def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>; + def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>; +} + +let Itinerary = ALU32_3op_tc_2_SLOT0123 in +def A2_svavghs : T_ALU32_3op_sfx<"vavgh", ":rnd", 0b111, 0b001, 0, 1>; + +def A2_svavgh : T_ALU32_3op<"vavgh", 0b111, 0b000, 0, 1>; +def A2_svnavgh : T_ALU32_3op<"vnavgh", 0b111, 0b011, 1, 0>; + +multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit OpsRev> { + def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>; + def f : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 0>; + def tnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 1>; + def fnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 1>; +} + +multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit OpsRev, bit IsComm> { + let isPredicable = 1 in + def A2_#NAME : T_ALU32_3op <mnemonic, MajOp, MinOp, OpsRev, IsComm>; + defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>; +} + +defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>; +defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>; +defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>; +defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>; +defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>; + +// Pats for instruction selection. +class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT> + : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>; + +def: BinOp32_pat<add, A2_add, i32>; +def: BinOp32_pat<and, A2_and, i32>; +def: BinOp32_pat<or, A2_or, i32>; +def: BinOp32_pat<sub, A2_sub, i32>; +def: BinOp32_pat<xor, A2_xor, i32>; + +// A few special cases producing register pairs: +let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { + def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>; + + let isPredicable = 1 in + def A2_combinew : T_ALU32_3op <"combine", 0b101, 0b000, 0, 0>; + + // Conditional combinew uses "newt/f" instead of "t/fnew". + def C2_ccombinewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>; + def C2_ccombinewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>; + def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>; + def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>; +} + +def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>; +def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>; + +let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in +class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm> + : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#mnemonic#"($Rs, $Rt)", + [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel { + let CextOpcode = mnemonic; + let isCommutable = IsComm; + bits<5> Rs; + bits<5> Rt; + bits<2> Pd; + + let IClass = 0b1111; + let Inst{27-24} = 0b0010; + let Inst{22-21} = MinOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4} = IsNeg; + let Inst{3-2} = 0b00; + let Inst{1-0} = Pd; +} + +let Itinerary = ALU32_3op_tc_2early_SLOT0123 in { + def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>; + def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>; + def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>; +} + +// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones +// that reverse the order of the operands. +class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; + +// Pats for compares. They use PatFrags as operands, not SDNodes, +// since seteq/setgt/etc. are defined as ParFrags. +class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT> + : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt))>; + +def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>; +def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>; +def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>; + +def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>; +def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>; + +let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in +def C2_mux: ALU32_rr<(outs IntRegs:$Rd), + (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt), + "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel { + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + bits<5> Rt; + + let CextOpcode = "mux"; + let InputType = "reg"; + let hasSideEffects = 0; + let IClass = 0b1111; + + let Inst{27-24} = 0b0100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{6-5} = Pu; + let Inst{4-0} = Rd; +} + +def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; + +// Combines the two immediates into a double register. +// Increase complexity to make it greater than any complexity of a combine +// that involves a register. + +let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1, + AddedComplexity = 75 in +def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), + "$Rdd = combine(#$s8, #$S8)", + [(set (i64 DoubleRegs:$Rdd), + (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> { + bits<5> Rdd; + bits<8> s8; + bits<8> S8; + + let IClass = 0b0111; + let Inst{27-23} = 0b11000; + let Inst{22-16} = S8{7-1}; + let Inst{13} = S8{0}; + let Inst{12-5} = s8; + let Inst{4-0} = Rdd; + } + +//===----------------------------------------------------------------------===// +// Template class for predicated ADD of a reg and an Immediate value. +//===----------------------------------------------------------------------===// +let hasNewValue = 1, hasSideEffects = 0 in +class T_Addri_Pred <bit PredNot, bit PredNew> + : ALU32_ri <(outs IntRegs:$Rd), + (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), + !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ", + ") $Rd = ")#"add($Rs, #$s8)"> { + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + bits<8> s8; + + let isPredicatedNew = PredNew; + let IClass = 0b0111; + + let Inst{27-24} = 0b0100; + let Inst{23} = PredNot; + let Inst{22-21} = Pu; + let Inst{20-16} = Rs; + let Inst{13} = PredNew; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; + } + +//===----------------------------------------------------------------------===// +// A2_addi: Add a signed immediate to a register. +//===----------------------------------------------------------------------===// +let hasNewValue = 1, hasSideEffects = 0 in +class T_Addri <Operand immOp> + : ALU32_ri <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, immOp:$s16), + "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> { + bits<5> Rd; + bits<5> Rs; + bits<16> s16; + + let IClass = 0b1011; + + let Inst{27-21} = s16{15-9}; + let Inst{20-16} = Rs; + let Inst{13-5} = s16{8-0}; + let Inst{4-0} = Rd; + } + +//===----------------------------------------------------------------------===// +// Multiclass for ADD of a register and an immediate value. +//===----------------------------------------------------------------------===// +multiclass Addri_Pred<string mnemonic, bit PredNot> { + let isPredicatedFalse = PredNot in { + def NAME : T_Addri_Pred<PredNot, 0>; + // Predicate new + def NAME#new : T_Addri_Pred<PredNot, 1>; + } +} + +let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in +multiclass Addri_base<string mnemonic, SDNode OpNode> { + let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in { + let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in + def A2_#NAME : T_Addri<s16Ext>; + + let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in { + defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>; + defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>; + } + } +} + +defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel; + +def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)), + (i32 (A2_addi I32:$Rs, imm:$s16))>; + +//===----------------------------------------------------------------------===// +// Template class used for the following ALU32 instructions. +// Rd=and(Rs,#s10) +// Rd=or(Rs,#s10) +//===----------------------------------------------------------------------===// +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10, +InputType = "imm", hasNewValue = 1 in +class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp> + : ALU32_ri <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s10Ext:$s10), + "$Rd = "#mnemonic#"($Rs, #$s10)" , + [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> { + bits<5> Rd; + bits<5> Rs; + bits<10> s10; + let CextOpcode = mnemonic; + + let IClass = 0b0111; + + let Inst{27-24} = 0b0110; + let Inst{23-22} = MinOp; + let Inst{21} = s10{9}; + let Inst{20-16} = Rs; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Rd; + } + +def A2_orir : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel; +def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel; + +// Subtract register from immediate +// Rd32=sub(#s10,Rs32) +let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1, + opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in +def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs), + "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel { + bits<5> Rd; + bits<10> s10; + bits<5> Rs; + + let IClass = 0b0111; + + let Inst{27-22} = 0b011001; + let Inst{21} = s10{9}; + let Inst{20-16} = Rs; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Rd; + } + +// Nop. +let hasSideEffects = 0 in +def A2_nop: ALU32Inst <(outs), (ins), "nop" > { + let IClass = 0b0111; + let Inst{27-24} = 0b1111; +} + +def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs), + (A2_subri imm:$s10, IntRegs:$Rs)>; + +// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). +def: Pat<(not (i32 IntRegs:$src1)), + (A2_subri -1, IntRegs:$src1)>; + +let hasSideEffects = 0, hasNewValue = 1 in +class T_tfr16<bit isHi> + : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16Imm:$u16), + "$Rx"#!if(isHi, ".h", ".l")#" = #$u16", + [], "$src1 = $Rx" > { + bits<5> Rx; + bits<16> u16; + + let IClass = 0b0111; + let Inst{27-26} = 0b00; + let Inst{25-24} = !if(isHi, 0b10, 0b01); + let Inst{23-22} = u16{15-14}; + let Inst{21} = 0b1; + let Inst{20-16} = Rx; + let Inst{13-0} = u16{13-0}; + } + +def A2_tfril: T_tfr16<0>; +def A2_tfrih: T_tfr16<1>; + +// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)". +let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in +class T_tfr_pred<bit isPredNot, bit isPredNew> + : ALU32Inst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ("#!if(isPredNot, "!", "")# + "$src1"#!if(isPredNew, ".new", "")# + ") $dst = $src2"> { + bits<5> dst; + bits<2> src1; + bits<5> src2; + + let isPredicatedFalse = isPredNot; + let isPredicatedNew = isPredNew; + let IClass = 0b0111; + + let Inst{27-24} = 0b0100; + let Inst{23} = isPredNot; + let Inst{13} = isPredNew; + let Inst{12-5} = 0; + let Inst{4-0} = dst; + let Inst{22-21} = src1; + let Inst{20-16} = src2; + } + +let isPredicable = 1 in +class T_tfr : ALU32Inst<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = $src"> { + bits<5> dst; + bits<5> src; + + let IClass = 0b0111; + + let Inst{27-21} = 0b0000011; + let Inst{20-16} = src; + let Inst{13} = 0b0; + let Inst{4-0} = dst; + } + +let InputType = "reg", hasNewValue = 1, hasSideEffects = 0 in +multiclass tfr_base<string CextOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp in { + def NAME : T_tfr; + + // Predicate + def t : T_tfr_pred<0, 0>; + def f : T_tfr_pred<1, 0>; + // Predicate new + def tnew : T_tfr_pred<0, 1>; + def fnew : T_tfr_pred<1, 1>; + } +} + +// Assembler mapped to C2_ccombinew[t|f|newt|newf]. +// Please don't add bits to this instruction as it'll be converted into +// 'combine' before object code emission. +let isPredicated = 1 in +class T_tfrp_pred<bit PredNot, bit PredNew> + : ALU32_rr <(outs DoubleRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2), + "if ("#!if(PredNot, "!", "")#"$src1" + #!if(PredNew, ".new", "")#") $dst = $src2" > { + let isPredicatedFalse = PredNot; + let isPredicatedNew = PredNew; + } + +// Assembler mapped to A2_combinew. +// Please don't add bits to this instruction as it'll be converted into +// 'combine' before object code emission. +class T_tfrp : ALU32Inst <(outs DoubleRegs:$dst), + (ins DoubleRegs:$src), + "$dst = $src">; + +let hasSideEffects = 0 in +multiclass TFR64_base<string BaseName> { + let BaseOpcode = BaseName in { + let isPredicable = 1 in + def NAME : T_tfrp; + // Predicate + def t : T_tfrp_pred <0, 0>; + def f : T_tfrp_pred <1, 0>; + // Predicate new + def tnew : T_tfrp_pred <0, 1>; + def fnew : T_tfrp_pred <1, 1>; + } +} + +let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12, + isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR", + hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in +class T_TFRI_Pred<bit PredNot, bit PredNew> + : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12Ext:$s12), + "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12", + [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel { + let isPredicatedFalse = PredNot; + let isPredicatedNew = PredNew; + + bits<5> Rd; + bits<2> Pu; + bits<12> s12; + + let IClass = 0b0111; + let Inst{27-24} = 0b1110; + let Inst{23} = PredNot; + let Inst{22-21} = Pu; + let Inst{20} = 0b0; + let Inst{19-16,12-5} = s12; + let Inst{13} = PredNew; + let Inst{4-0} = Rd; +} + +def C2_cmoveit : T_TFRI_Pred<0, 0>; +def C2_cmoveif : T_TFRI_Pred<1, 0>; +def C2_cmovenewit : T_TFRI_Pred<0, 1>; +def C2_cmovenewif : T_TFRI_Pred<1, 1>; + +let InputType = "imm", isExtendable = 1, isExtentSigned = 1, + CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0, + isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1, + isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in +def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16", + [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, + ImmRegRel, PredRel { + bits<5> Rd; + bits<16> s16; + + let IClass = 0b0111; + let Inst{27-24} = 0b1000; + let Inst{23-22,20-16,13-5} = s16; + let Inst{4-0} = Rd; +} + +defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel; +let isAsmParserOnly = 1 in +defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel; + +// Assembler mapped +let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isAsmParserOnly = 1 in +def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), + "$dst = #$src1", + [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>; + +// TODO: see if this instruction can be deleted.. +let isExtendable = 1, opExtendable = 1, opExtentBits = 6, + isAsmParserOnly = 1 in { +def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1), + "$dst = #$src1">; +def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst), + (ins s8Ext:$src1, s8Imm:$src2), + "$dst = combine(##$src1, #$src2)">; +} + +//===----------------------------------------------------------------------===// +// ALU32/ALU - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// +// Scalar mux register immediate. +let hasSideEffects = 0, isExtentSigned = 1, CextOpcode = "MUX", + InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 8 in +class T_MUX1 <bit MajOp, dag ins, string AsmStr> + : ALU32Inst <(outs IntRegs:$Rd), ins, AsmStr>, ImmRegRel { + bits<5> Rd; + bits<2> Pu; + bits<8> s8; + bits<5> Rs; + + let IClass = 0b0111; + let Inst{27-24} = 0b0011; + let Inst{23} = MajOp; + let Inst{22-21} = Pu; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; +} + +let opExtendable = 2 in +def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs), + "$Rd = mux($Pu, #$s8, $Rs)">; + +let opExtendable = 3 in +def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), + "$Rd = mux($Pu, $Rs, #$s8)">; + +def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)), + (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>; + +def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)), + (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>; + +// C2_muxii: Scalar mux immediates. +let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, + opExtentBits = 8, opExtendable = 2 in +def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), + (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8), + "$Rd = mux($Pu, #$s8, #$S8)" , + [(set (i32 IntRegs:$Rd), + (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > { + bits<5> Rd; + bits<2> Pu; + bits<8> s8; + bits<8> S8; + + let IClass = 0b0111; + + let Inst{27-25} = 0b101; + let Inst{24-23} = Pu; + let Inst{22-16} = S8{7-1}; + let Inst{13} = S8{0}; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; + } + +let isCodeGenOnly = 1, isPseudo = 1 in +def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"should not emit\" ", []>; + + +//===----------------------------------------------------------------------===// +// template class for non-predicated alu32_2op instructions +// - aslh, asrh, sxtb, sxth, zxth +//===----------------------------------------------------------------------===// +let hasNewValue = 1, opNewValue = 0 in +class T_ALU32_2op <string mnemonic, bits<3> minOp> : + ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = "#mnemonic#"($Rs)", [] > { + bits<5> Rd; + bits<5> Rs; + + let IClass = 0b0111; + + let Inst{27-24} = 0b0000; + let Inst{23-21} = minOp; + let Inst{13} = 0b0; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; +} + +//===----------------------------------------------------------------------===// +// template class for predicated alu32_2op instructions +// - aslh, asrh, sxtb, sxth, zxtb, zxth +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot, + bit isPredNew > : + ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs), + !if(isPredNot, "if (!$Pu", "if ($Pu") + #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> { + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + + let IClass = 0b0111; + + let Inst{27-24} = 0b0000; + let Inst{23-21} = minOp; + let Inst{13} = 0b1; + let Inst{11} = isPredNot; + let Inst{10} = isPredNew; + let Inst{4-0} = Rd; + let Inst{9-8} = Pu; + let Inst{20-16} = Rs; +} + +multiclass ALU32_2op_Pred<string mnemonic, bits<3> minOp, bit PredNot> { + let isPredicatedFalse = PredNot in { + def NAME : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 0>; + + // Predicate new + let isPredicatedNew = 1 in + def NAME#new : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 1>; + } +} + +multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> { + let BaseOpcode = mnemonic in { + let isPredicable = 1, hasSideEffects = 0 in + def A2_#NAME : T_ALU32_2op<mnemonic, minOp>; + + let isPredicated = 1, hasSideEffects = 0 in { + defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>; + defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>; + } + } +} + +defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel; +defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel; +defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel; +defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel; +defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel; + +// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255). +// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has +// predicated forms while 'and' doesn't. Since integrated assembler can't +// handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where +// immediate operand is set to '255'. + +let hasNewValue = 1, opNewValue = 0 in +class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255) + bits<5> Rd; + bits<5> Rs; + bits<10> s10 = 255; + + let IClass = 0b0111; + + let Inst{27-22} = 0b011000; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; + let Inst{21} = s10{9}; + let Inst{13-5} = s10{8-0}; +} + +//Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255) +multiclass ZXTB_base <string mnemonic, bits<3> minOp> { + let BaseOpcode = mnemonic in { + let isPredicable = 1, hasSideEffects = 0 in + def A2_#NAME : T_ZXTB; + + let isPredicated = 1, hasSideEffects = 0 in { + defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>; + defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>; + } + } +} + +defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel; + +def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; +def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; + +//===----------------------------------------------------------------------===// +// Template class for vector add and avg +//===----------------------------------------------------------------------===// + +class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp, + bit isSat, bit isRnd, bit isCrnd, bit SwapOps > + : ALU64_rr < (outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "") + #!if(isCrnd,":crnd","") + #!if(isSat, ":sat", ""), + [], "", ALU64_tc_2_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-24} = 0b0011; + let Inst{23-21} = majOp; + let Inst{20-16} = !if (SwapOps, Rtt, Rss); + let Inst{12-8} = !if (SwapOps, Rss, Rtt); + let Inst{7-5} = minOp; + let Inst{4-0} = Rdd; + } + +// ALU64 - Vector add +// Rdd=vadd[u][bhw](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vaddub : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>; + def A2_vaddh : T_VectALU_64 < "vaddh", 0b000, 0b010, 0, 0, 0, 0>; + def A2_vaddw : T_VectALU_64 < "vaddw", 0b000, 0b101, 0, 0, 0, 0>; +} + +// Rdd=vadd[u][bhw](Rss,Rtt):sat +let Defs = [USR_OVF] in { + def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>; + def A2_vaddhs : T_VectALU_64 < "vaddh", 0b000, 0b011, 1, 0, 0, 0>; + def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>; + def A2_vaddws : T_VectALU_64 < "vaddw", 0b000, 0b110, 1, 0, 0, 0>; +} + +// ALU64 - Vector average +// Rdd=vavg[u][bhw](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>; + def A2_vavgh : T_VectALU_64 < "vavgh", 0b010, 0b010, 0, 0, 0, 0>; + def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>; + def A2_vavgw : T_VectALU_64 < "vavgw", 0b011, 0b000, 0, 0, 0, 0>; + def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>; +} + +// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd] +def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>; +def A2_vavghr : T_VectALU_64 < "vavgh", 0b010, 0b011, 0, 1, 0, 0>; +def A2_vavghcr : T_VectALU_64 < "vavgh", 0b010, 0b100, 0, 0, 1, 0>; +def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>; + +def A2_vavgwr : T_VectALU_64 < "vavgw", 0b011, 0b001, 0, 1, 0, 0>; +def A2_vavgwcr : T_VectALU_64 < "vavgw", 0b011, 0b010, 0, 0, 1, 0>; +def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>; + +// Rdd=vnavg[bh](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vnavgh : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>; + def A2_vnavgw : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>; +} + +// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat +let Defs = [USR_OVF] in { + def A2_vnavghr : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>; + def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>; + def A2_vnavgwr : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>; + def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>; +} + +// Rdd=vsub[u][bh](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vsubub : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>; + def A2_vsubh : T_VectALU_64 < "vsubh", 0b001, 0b010, 0, 0, 0, 1>; + def A2_vsubw : T_VectALU_64 < "vsubw", 0b001, 0b101, 0, 0, 0, 1>; +} + +// Rdd=vsub[u][bh](Rss,Rtt):sat +let Defs = [USR_OVF] in { + def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>; + def A2_vsubhs : T_VectALU_64 < "vsubh", 0b001, 0b011, 1, 0, 0, 1>; + def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>; + def A2_vsubws : T_VectALU_64 < "vsubw", 0b001, 0b110, 1, 0, 0, 1>; +} + +// Rdd=vmax[u][bhw](Rss,Rtt) +def A2_vmaxb : T_VectALU_64 < "vmaxb", 0b110, 0b110, 0, 0, 0, 1>; +def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>; +def A2_vmaxh : T_VectALU_64 < "vmaxh", 0b110, 0b001, 0, 0, 0, 1>; +def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>; +def A2_vmaxw : T_VectALU_64 < "vmaxw", 0b110, 0b011, 0, 0, 0, 1>; +def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>; + +// Rdd=vmin[u][bhw](Rss,Rtt) +def A2_vminb : T_VectALU_64 < "vminb", 0b110, 0b111, 0, 0, 0, 1>; +def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>; +def A2_vminh : T_VectALU_64 < "vminh", 0b101, 0b001, 0, 0, 0, 1>; +def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>; +def A2_vminw : T_VectALU_64 < "vminw", 0b101, 0b011, 0, 0, 0, 1>; +def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>; + +//===----------------------------------------------------------------------===// +// Template class for vector compare +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_vcmp <string Str, bits<4> minOp> + : ALU64_rr <(outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Pd = "#Str#"($Rss, $Rtt)", [], + "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00100; + let Inst{13} = minOp{3}; + let Inst{7-5} = minOp{2-0}; + let Inst{1-0} = Pd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T> + : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), + (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; + +// Vector compare bytes +def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>; +def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>; + +// Vector compare halfwords +def A2_vcmpheq : T_vcmp <"vcmph.eq", 0b0011>; +def A2_vcmphgt : T_vcmp <"vcmph.gt", 0b0100>; +def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>; + +// Vector compare words +def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>; +def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>; +def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>; + +def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>; +def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>; +def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>; +def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>; +def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>; +def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>; +def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>; +def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; + +//===----------------------------------------------------------------------===// +// ALU32/PERM - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PRED + +//===----------------------------------------------------------------------===// +// No bits needed. If cmp.ge is found the assembler parser will +// transform it to cmp.gt subtracting 1 from the immediate. +let isPseudo = 1 in { +def C2_cmpgei: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8), + "$Pd = cmp.ge($Rs, #$s8)">; +def C2_cmpgeui: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8), + "$Pd = cmp.geu($Rs, #$s8)">; +} + + +//===----------------------------------------------------------------------===// +// ALU32/PRED - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// +// Add. +//===----------------------------------------------------------------------===// +// Template Class +// Add/Subtract halfword +// Rd=add(Rt.L,Rs.[HL])[:sat] +// Rd=sub(Rt.L,Rs.[HL])[:sat] +// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16] +// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16] +//===----------------------------------------------------------------------===// + +let hasNewValue = 1, opNewValue = 0 in +class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub> + : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs), + "$Rd = "#!if(isSub,"sub","add")#"($Rt." + #!if(hasShift, !if(LHbits{1},"h","l"),"l") #", $Rs." + #!if(hasShift, !if(LHbits{0},"h)","l)"), !if(LHbits{1},"h)","l)")) + #!if(isSat,":sat","") + #!if(hasShift,":<<16",""), [], "", ALU64_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rt; + bits<5> Rs; + let IClass = 0b1101; + + let Inst{27-23} = 0b01010; + let Inst{22} = hasShift; + let Inst{21} = isSub; + let Inst{7} = isSat; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rd; + let Inst{12-8} = Rt; + let Inst{20-16} = Rs; + } + +//Rd=sub(Rt.L,Rs.[LH]) +def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>; +def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>; + +//Rd=add(Rt.L,Rs.[LH]) +def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>; +def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>; + +let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { + //Rd=sub(Rt.L,Rs.[LH]):sat + def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>; + def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>; + + //Rd=add(Rt.L,Rs.[LH]):sat + def A2_addh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 0>; + def A2_addh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 0>; +} + +//Rd=sub(Rt.[LH],Rs.[LH]):<<16 +def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>; +def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>; +def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>; +def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>; + +//Rd=add(Rt.[LH],Rs.[LH]):<<16 +def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>; +def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>; +def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>; +def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>; + +let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { + //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16 + def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>; + def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>; + def A2_subh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 1>; + def A2_subh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 1>; + + //Rd=add(Rt.[LH],Rs.[LH]):sat:<<16 + def A2_addh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 0>; + def A2_addh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 0>; + def A2_addh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 0>; + def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>; +} + +// Add halfword. +def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), + (A2_addh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), + (A2_addh_l16_hl I32:$src1, I32:$src2)>; + +def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), + (A2_addh_h16_ll I32:$src1, I32:$src2)>; + +// Subtract halfword. +def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), + (A2_subh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), + (A2_subh_h16_ll I32:$src1, I32:$src2)>; + +let hasSideEffects = 0, hasNewValue = 1 in +def S2_parityp: ALU64Inst<(outs IntRegs:$Rd), + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0000; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, opNewValue = 0, hasSideEffects = 0 in +class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned > + : ALU64Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs), + "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","") + #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rt; + bits<5> Rs; + + let IClass = 0b1101; + + let Inst{27-23} = 0b01011; + let Inst{22-21} = !if(isMax, 0b10, 0b01); + let Inst{7} = isUnsigned; + let Inst{4-0} = Rd; + let Inst{12-8} = !if(isMax, Rs, Rt); + let Inst{20-16} = !if(isMax, Rt, Rs); + } + +def A2_min : T_XTYPE_MIN_MAX < 0, 0 >; +def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >; +def A2_max : T_XTYPE_MIN_MAX < 1, 0 >; +def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >; + +// Here, depending on the operand being selected, we'll either generate a +// min or max instruction. +// Ex: +// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected +// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. +// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value +// is selected and the corresponding HexagonInst is passed in 'SwapInst'. + +multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT, + InstHexagon Inst, InstHexagon SwapInst> { + def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), + (VT RC:$src1), (VT RC:$src2)), + (Inst RC:$src1, RC:$src2)>; + def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), + (VT RC:$src2), (VT RC:$src1)), + (SwapInst RC:$src1, RC:$src2)>; +} + + +multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { + defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>; + + def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), + (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), i16), + (Inst IntRegs:$src1, IntRegs:$src2)>; + + def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), + (i32 PositiveHalfWord:$src2), + (i32 PositiveHalfWord:$src1))), i16), + (SwapInst IntRegs:$src1, IntRegs:$src2)>; +} + +let AddedComplexity = 200 in { + defm: MinMax_pats<setge, A2_max, A2_min>; + defm: MinMax_pats<setgt, A2_max, A2_min>; + defm: MinMax_pats<setle, A2_min, A2_max>; + defm: MinMax_pats<setlt, A2_min, A2_max>; + defm: MinMax_pats<setuge, A2_maxu, A2_minu>; + defm: MinMax_pats<setugt, A2_maxu, A2_minu>; + defm: MinMax_pats<setule, A2_minu, A2_maxu>; + defm: MinMax_pats<setult, A2_minu, A2_maxu>; +} + +class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm> + : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> { + let isCompare = 1; + let isCommutable = IsComm; + let hasSideEffects = 0; + + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0010100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{1-0} = Pd; +} + +def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>; +def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>; +def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>; + +class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp> + : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))), + (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; + +def: T_cmp64_rr_pat<C2_cmpeqp, seteq>; +def: T_cmp64_rr_pat<C2_cmpgtp, setgt>; +def: T_cmp64_rr_pat<C2_cmpgtup, setugt>; +def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>; +def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>; + +def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { + let hasSideEffects = 0; + + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0001; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{6-5} = Pu; + let Inst{4-0} = Rd; +} + +class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType, + bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm, + string Op2Pfx> + : ALU64_rr<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Rd = " #mnemonic# "($Rs, " #Op2Pfx# "$Rt)" #suffix, [], + "", ALU64_tc_1_SLOT23> { + let hasSideEffects = 0; + let isCommutable = IsComm; + + bits<5> Rs; + bits<5> Rt; + bits<5> Rd; + + let IClass = 0b1101; + let Inst{27-24} = RegType; + let Inst{23-21} = MajOp; + let Inst{20-16} = !if (OpsRev,Rt,Rs); + let Inst{12-8} = !if (OpsRev,Rs,Rt); + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; +} + +class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat, + bit OpsRev, bit IsComm> + : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev, + IsComm, "">; + +def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>; +def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>; + +def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; + +class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm, + bit IsNeg> + : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm, + !if(IsNeg,"~","")>; + +def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>; +def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>; +def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>; + +def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; + +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/BIT + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/BIT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/PERM + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CR + +//===----------------------------------------------------------------------===// +// Logical reductions on predicates. + +// Looping instructions. + +// Pipelined looping instructions. + +// Logical operations on predicates. +let hasSideEffects = 0 in +class T_LOGICAL_1OP<string MnOp, bits<2> OpBits> + : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps), + "$Pd = " # MnOp # "($Ps)", [], "", CR_tc_2early_SLOT23> { + bits<2> Pd; + bits<2> Ps; + + let IClass = 0b0110; + let Inst{27-23} = 0b10111; + let Inst{22-21} = OpBits; + let Inst{20} = 0b0; + let Inst{17-16} = Ps; + let Inst{13} = 0b0; + let Inst{1-0} = Pd; +} + +def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>; +def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>; +def C2_not : T_LOGICAL_1OP<"not", 0b10>; + +def: Pat<(i1 (not (i1 PredRegs:$Ps))), + (C2_not PredRegs:$Ps)>; + +let hasSideEffects = 0 in +class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev> + : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt), + "$Pd = " # MnOp # "($Ps, " # !if (IsNeg,"!","") # "$Pt)", + [], "", CR_tc_2early_SLOT23> { + bits<2> Pd; + bits<2> Ps; + bits<2> Pt; + + let IClass = 0b0110; + let Inst{27-24} = 0b1011; + let Inst{23-21} = OpBits; + let Inst{20} = 0b0; + let Inst{17-16} = !if(Rev,Pt,Ps); // Rs and Rt are reversed for some + let Inst{13} = 0b0; // instructions. + let Inst{9-8} = !if(Rev,Ps,Pt); + let Inst{1-0} = Pd; +} + +def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>; +def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>; +def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>; +def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>; +def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>; + +def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; + +let hasSideEffects = 0, hasNewValue = 1 in +def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt), + "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<2> Ps; + bits<2> Pt; + + let IClass = 0b1000; + let Inst{27-24} = 0b1001; + let Inst{22-21} = 0b00; + let Inst{17-16} = Ps; + let Inst{9-8} = Pt; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0 in +def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt), + "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<2> Pt; + + let IClass = 0b1000; + let Inst{27-24} = 0b0110; + let Inst{9-8} = Pt; + let Inst{4-0} = Rd; +} + +// User control register transfer. +//===----------------------------------------------------------------------===// +// CR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// + +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; + +class CondStr<string CReg, bit True, bit New> { + string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; +} +class JumpOpcStr<string Mnemonic, bit New, bit Taken> { + string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", "")); +} + +let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0, + isPredicable = 1, + isExtendable = 1, opExtendable = 0, isExtentSigned = 1, + opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in +class T_JMP<string ExtStr> + : JInst<(outs), (ins brtarget:$dst), + "jump " # ExtStr # "$dst", + [], "", J_tc_2early_SLOT23> { + bits<24> dst; + let IClass = 0b0101; + + let Inst{27-25} = 0b100; + let Inst{24-16} = dst{23-15}; + let Inst{13-1} = dst{14-2}; +} + +let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1, + isExtendable = 1, opExtendable = 1, isExtentSigned = 1, + opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in +class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr> + : JInst<(outs), (ins PredRegs:$src, brtarget:$dst), + CondStr<"$src", !if(PredNot,0,1), isPredNew>.S # + JumpOpcStr<"jump", isPredNew, isTak>.S # " " # + ExtStr # "$dst", + [], "", J_tc_2early_SLOT23>, ImmRegRel { + let isTaken = isTak; + let isPredicatedFalse = PredNot; + let isPredicatedNew = isPredNew; + bits<2> src; + bits<17> dst; + + let IClass = 0b0101; + + let Inst{27-24} = 0b1100; + let Inst{21} = PredNot; + let Inst{12} = isTak; + let Inst{11} = isPredNew; + let Inst{9-8} = src; + let Inst{23-22} = dst{16-15}; + let Inst{20-16} = dst{14-10}; + let Inst{13} = dst{9}; + let Inst{7-1} = dst{8-2}; + } + +multiclass JMP_Pred<bit PredNot, string ExtStr> { + def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken + // Predicate new + def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken + def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken +} + +multiclass JMP_base<string BaseOp, string ExtStr> { + let BaseOpcode = BaseOp in { + def NAME : T_JMP<ExtStr>; + defm t : JMP_Pred<0, ExtStr>; + defm f : JMP_Pred<1, ExtStr>; + } +} + +// Jumps to address stored in a register, JUMPR_MISC +// if ([[!]P[.new]]) jumpr[:t/nt] Rs +let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC], + isPredicable = 1, hasSideEffects = 0, InputType = "reg" in +class T_JMPr + : JRInst<(outs), (ins IntRegs:$dst), + "jumpr $dst", [], "", J_tc_2early_SLOT2> { + bits<5> dst; + + let IClass = 0b0101; + let Inst{27-21} = 0b0010100; + let Inst{20-16} = dst; +} + +let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1, + hasSideEffects = 0, InputType = "reg" in +class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak> + : JRInst <(outs), (ins PredRegs:$src, IntRegs:$dst), + CondStr<"$src", !if(PredNot,0,1), isPredNew>.S # + JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", [], + "", J_tc_2early_SLOT2> { + + let isTaken = isTak; + let isPredicatedFalse = PredNot; + let isPredicatedNew = isPredNew; + bits<2> src; + bits<5> dst; + + let IClass = 0b0101; + + let Inst{27-22} = 0b001101; + let Inst{21} = PredNot; + let Inst{20-16} = dst; + let Inst{12} = isTak; + let Inst{11} = isPredNew; + let Inst{9-8} = src; +} + +multiclass JMPR_Pred<bit PredNot> { + def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken + // Predicate new + def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken + def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken +} + +multiclass JMPR_base<string BaseOp> { + let BaseOpcode = BaseOp in { + def NAME : T_JMPr; + defm t : JMPR_Pred<0>; + defm f : JMPR_Pred<1>; + } +} + +let isCall = 1, hasSideEffects = 1 in +class JUMPR_MISC_CALLR<bit isPred, bit isPredNot, + dag InputDag = (ins IntRegs:$Rs)> + : JRInst<(outs), InputDag, + !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs", + "if ($Pu) callr $Rs"), + "callr $Rs"), + [], "", J_tc_2early_SLOT2> { + bits<5> Rs; + bits<2> Pu; + let isPredicated = isPred; + let isPredicatedFalse = isPredNot; + + let IClass = 0b0101; + let Inst{27-25} = 0b000; + let Inst{24-23} = !if (isPred, 0b10, 0b01); + let Inst{22} = 0; + let Inst{21} = isPredNot; + let Inst{9-8} = !if (isPred, Pu, 0b00); + let Inst{20-16} = Rs; + + } + +let Defs = VolatileV3.Regs in { + def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>; + def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>; +} + +let isTerminator = 1, hasSideEffects = 0 in { + defm J2_jump : JMP_base<"JMP", "">, PredNewRel; + + // Deal with explicit assembly + // - never extened a jump #, always extend a jump ## + let isAsmParserOnly = 1 in { + defm J2_jump_ext : JMP_base<"JMP", "##">; + defm J2_jump_noext : JMP_base<"JMP", "#">; + } + + defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel; + + let isReturn = 1, isCodeGenOnly = 1 in + defm JMPret : JMPR_base<"JMPret">, PredNewRel; +} + +def: Pat<(br bb:$dst), + (J2_jump brtarget:$dst)>; +def: Pat<(retflag), + (JMPret (i32 R31))>; +def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; + +// A return through builtin_eh_return. +let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0, + isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in +def EH_RETURN_JMPR : T_JMPr; + +def: Pat<(eh_return), + (EH_RETURN_JMPR (i32 R31))>; +def: Pat<(brind (i32 IntRegs:$dst)), + (J2_jumpr IntRegs:$dst)>; + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// + +// Load - Base with Immediate offset addressing mode +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in +class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp, + Operand ImmOp> + : LDInst<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset), + "$dst = "#mnemonic#"($src1 + #$offset)", []>, AddrModeRel { + bits<4> name; + bits<5> dst; + bits<5> src1; + bits<14> offset; + bits<11> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), offset{13-3}, + !if (!eq(ImmOpStr, "s11_2Ext"), offset{12-2}, + !if (!eq(ImmOpStr, "s11_1Ext"), offset{11-1}, + /* s11_0Ext */ offset{10-0}))); + let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14, + !if (!eq(ImmOpStr, "s11_2Ext"), 13, + !if (!eq(ImmOpStr, "s11_1Ext"), 12, + /* s11_0Ext */ 11))); + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + + let IClass = 0b1001; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13-5} = offsetBits{8-0}; + let Inst{4-0} = dst; + } + +let opExtendable = 3, isExtentSigned = 0, isPredicated = 1 in +class T_pload_io <string mnemonic, RegisterClass RC, bits<4>MajOp, + Operand ImmOp, bit isNot, bit isPredNew> + : LDInst<(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "if ("#!if(isNot, "!$src1", "$src1") + #!if(isPredNew, ".new", "") + #") $dst = "#mnemonic#"($src2 + #$offset)", + [],"", V2LDST_tc_ld_SLOT01> , AddrModeRel { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<9> offset; + bits<6> offsetBits; + string ImmOpStr = !cast<string>(ImmOp); + + let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), offset{8-3}, + !if (!eq(ImmOpStr, "u6_2Ext"), offset{7-2}, + !if (!eq(ImmOpStr, "u6_1Ext"), offset{6-1}, + /* u6_0Ext */ offset{5-0}))); + let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9, + !if (!eq(ImmOpStr, "u6_2Ext"), 8, + !if (!eq(ImmOpStr, "u6_1Ext"), 7, + /* u6_0Ext */ 6))); + let hasNewValue = !if (!eq(ImmOpStr, "u6_3Ext"), 0, 1); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isNot; + + let IClass = 0b0100; + + let Inst{27} = 0b0; + let Inst{27} = 0b0; + let Inst{26} = isNot; + let Inst{25} = isPredNew; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b0; + let Inst{12-11} = src1; + let Inst{10-5} = offsetBits; + let Inst{4-0} = dst; + } + +let isExtendable = 1, hasSideEffects = 0, addrMode = BaseImmOffset in +multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<4>MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + let isPredicable = 1 in + def L2_#NAME#_io : T_load_io <mnemonic, RC, MajOp, ImmOp>; + + // Predicated + def L2_p#NAME#t_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 0>; + def L2_p#NAME#f_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 0>; + + // Predicated new + def L2_p#NAME#tnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 1>; + def L2_p#NAME#fnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 1>; + } +} + +let accessSize = ByteAccess in { + defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>; + defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>; +} + +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>; + defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>; +} + +let accessSize = WordAccess, opExtentAlign = 2 in +defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>; + +let accessSize = DoubleWordAccess, opExtentAlign = 3 in +defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>; + +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + def L2_loadbsw2_io: T_load_io<"membh", IntRegs, 0b0001, s11_1Ext>; + def L2_loadbzw2_io: T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>; +} + +let accessSize = WordAccess, opExtentAlign = 2 in { + def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>; + def L2_loadbsw4_io: T_load_io<"membh", DoubleRegs, 0b0111, s11_2Ext>; +} + +let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0, + opExtendable = 3, isExtentSigned = 1 in +class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp> + : LDInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "$dst = "#str#"($src2 + #$offset)", [], + "$src1 = $dst">, AddrModeRel { + bits<4> name; + bits<5> dst; + bits<5> src2; + bits<12> offset; + bits<11> offsetBits; + + let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1}, + /* s11_0Ext */ offset{10-0}); + let IClass = 0b1001; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13-5} = offsetBits{8-0}; + let Inst{4-0} = dst; + } + +let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in +def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>; + +let accessSize = ByteAccess, opExtentBits = 11 in +def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>; + +// Patterns to select load-indexed (i.e. load from base+offset). +multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, + InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; +} + +let AddedComplexity = 20 in { + defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>; + defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>; + + defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>; + defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>; + defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + // No sextloadi1. +} + +// Sign-extending loads of i1 need to replicate the lowest bit throughout +// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should +// do the trick. +let AddedComplexity = 20 in +def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))), + (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; + +//===----------------------------------------------------------------------===// +// Post increment load +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Template class for non-predicated post increment loads with immediate offset. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = PostInc in +class T_load_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp > + : LDInstPI <(outs RC:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, ImmOp:$offset), + "$dst = "#mnemonic#"($src1++#$offset)" , + [], + "$src1 = $dst2" > , + PredNewRel { + bits<5> dst; + bits<5> src1; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + + let IClass = 0b1001; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13-12} = 0b00; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; + } + +//===----------------------------------------------------------------------===// +// Template class for predicated post increment loads with immediate offset. +//===----------------------------------------------------------------------===// +let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in +class T_pload_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp, bit isPredNot, bit isPredNew > + : LDInst <(outs RC:$dst, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($src2++#$offset)", + [] , + "$src2 = $dst2" > , + PredNewRel { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<7> offset; + bits<4> offsetBits; + + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + + let IClass = 0b1001; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b1; + let Inst{12} = isPredNew; + let Inst{11} = isPredNot; + let Inst{10-9} = src1; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; + } + +//===----------------------------------------------------------------------===// +// Multiclass for post increment loads with immediate offset. +//===----------------------------------------------------------------------===// + +multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp, bits<4> MajOp> { + let BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def L2_#NAME#_pi : T_load_pi < mnemonic, RC, ImmOp, MajOp>; + + // Predicated + def L2_p#NAME#t_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 0>; + def L2_p#NAME#f_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 0>; + + // Predicated new + def L2_p#NAME#tnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 1>; + def L2_p#NAME#fnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 1>; + } +} + +// post increment byte loads with immediate offset +let accessSize = ByteAccess in { + defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>; + defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>; +} + +// post increment halfword loads with immediate offset +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>; + defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>; +} + +// post increment word loads with immediate offset +let accessSize = WordAccess, opExtentAlign = 2 in +defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>; + +// post increment doubleword loads with immediate offset +let accessSize = DoubleWordAccess, opExtentAlign = 3 in +defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>; + +// Rd=memb[u]h(Rx++#s4:1) +// Rdd=memb[u]h(Rx++#s4:2) +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + def L2_loadbsw2_pi : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>; + def L2_loadbzw2_pi : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>; +} +let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in { + def L2_loadbsw4_pi : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>; + def L2_loadbzw4_pi : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>; +} + +//===----------------------------------------------------------------------===// +// Template class for post increment fifo loads with immediate offset. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = PostInc in +class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp > + : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2), + (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "$dst = "#mnemonic#"($src2++#$offset)" , + [], "$src2 = $dst2, $src1 = $dst" > , + PredNewRel { + bits<5> dst; + bits<5> src2; + bits<5> offset; + bits<4> offsetBits; + + let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}); + let IClass = 0b1001; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13-12} = 0b00; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; + } + +// Ryy=memh_fifo(Rx++#s4:1) +// Ryy=memb_fifo(Rx++#s4:0) +let accessSize = ByteAccess in +def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>; + +let accessSize = HalfWordAccess, opExtentAlign = 1 in +def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>; + +//===----------------------------------------------------------------------===// +// Template class for post increment loads with register offset. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = PostInc in +class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp, + MemAccessSize AccessSz> + : LDInstPI <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2), + "$dst = "#mnemonic#"($src1++$src2)" , + [], "$src1 = $_dst_" > { + bits<5> dst; + bits<5> src1; + bits<1> src2; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2; + let Inst{12} = 0b0; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +let hasNewValue = 1 in { + def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>; + def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>; + def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>; + def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>; + def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>; + + def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>; +} + +def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>; +def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>; + +// Load predicate. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +def LDriw_pred : LDInst<(outs PredRegs:$dst), + (ins IntRegs:$addr, s11_2Ext:$off), + ".error \"should not emit\"", []>; + +let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in + def L2_deallocframe : LDInst<(outs), (ins), + "deallocframe", + []> { + let IClass = 0b1001; + + let Inst{27-16} = 0b000000011110; + let Inst{13} = 0b0; + let Inst{4-0} = 0b11110; +} + +// Load / Post increment circular addressing mode. +let Uses = [CS], hasSideEffects = 0 in +class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp> + : LDInst <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [], + "$Rz = $_dst_" > { + bits<5> dst; + bits<5> Rz; + bit Mu; + + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + let IClass = 0b1001; + + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{9} = 0b1; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +let accessSize = ByteAccess in { + def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>; + def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>; +} + +let accessSize = HalfWordAccess in { + def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>; + def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>; + def L2_loadbsw2_pcr : T_load_pcr <"membh", IntRegs, 0b0001>; + def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>; +} + +let accessSize = WordAccess in { + def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>; + let hasNewValue = 0 in { + def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>; + def L2_loadbsw4_pcr : T_load_pcr <"membh", DoubleRegs, 0b0111>; + } +} + +let accessSize = DoubleWordAccess in +def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>; + +// Load / Post increment circular addressing mode. +let Uses = [CS], hasSideEffects = 0 in +class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz > + : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_), + (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [], + "$Rz = $_dst_, $dst = $_src_" > { + bits<5> dst; + bits<5> Rz; + bit Mu; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{9} = 0b1; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>; +def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>; + +//===----------------------------------------------------------------------===// +// Circular loads with immediate offset. +//===----------------------------------------------------------------------===// +let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in +class T_load_pci <string mnemonic, RegisterClass RC, + Operand ImmOp, bits<4> MajOp> + : LDInstPI<(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$Rz, ImmOp:$offset, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ #$offset:circ($Mu))", [], + "$Rz = $_dst_"> { + bits<5> dst; + bits<5> Rz; + bits<1> Mu; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + let IClass = 0b1001; + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{9} = 0b0; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; + } + +// Byte variants of circ load +let accessSize = ByteAccess in { + def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>; + def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>; +} + +// Half word variants of circ load +let accessSize = HalfWordAccess in { + def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>; + def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>; + def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>; + def L2_loadbsw2_pci : T_load_pci <"membh", IntRegs, s4_1Imm, 0b0001>; +} + +// Word variants of circ load +let accessSize = WordAccess in +def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>; + +let accessSize = WordAccess, hasNewValue = 0 in { + def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>; + def L2_loadbsw4_pci : T_load_pci <"membh", DoubleRegs, s4_2Imm, 0b0111>; +} + +let accessSize = DoubleWordAccess, hasNewValue = 0 in +def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>; + +//===----------------------------------------------------------------------===// +// Circular loads - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't +// appear in the AsmString because it's same as 'dst'. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in +class T_load_pci_pseudo <string opc, RegisterClass RC> + : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4), + ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"", + [], "$src1 = $_dst_">; + +def L2_loadrb_pci_pseudo : T_load_pci_pseudo <"memb", IntRegs>; +def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>; +def L2_loadrh_pci_pseudo : T_load_pci_pseudo <"memh", IntRegs>; +def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>; +def L2_loadri_pci_pseudo : T_load_pci_pseudo <"memw", IntRegs>; +def L2_loadrd_pci_pseudo : T_load_pci_pseudo <"memd", DoubleRegs>; + + +// TODO: memb_fifo and memh_fifo must take destination register as input. +// One-off circ loads - not enough in common to break into a class. +let accessSize = ByteAccess in +def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>; + +let accessSize = HalfWordAccess, opExtentAlign = 1 in +def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>; + +// L[24]_load[wd]_locked: Load word/double with lock. +let isSoloAX = 1 in +class T_load_locked <string mnemonic, RegisterClass RC> + : LD0Inst <(outs RC:$dst), + (ins IntRegs:$src), + "$dst = "#mnemonic#"($src)"> { + bits<5> dst; + bits<5> src; + let IClass = 0b1001; + let Inst{27-21} = 0b0010000; + let Inst{20-16} = src; + let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00); + let Inst{5} = 0; + let Inst{4-0} = dst; +} +let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in + def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>; +let accessSize = DoubleWordAccess in + def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>; + +// S[24]_store[wd]_locked: Store word/double conditionally. +let isSoloAX = 1, isPredicateLate = 1 in +class T_store_locked <string mnemonic, RegisterClass RC> + : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt), + mnemonic#"($Rs, $Pd) = $Rt"> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1010; + let Inst{27-23} = 0b00001; + let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1); + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{1-0} = Pd; +} + +let accessSize = WordAccess in +def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>; + +let accessSize = DoubleWordAccess in +def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>; + +//===----------------------------------------------------------------------===// +// Bit-reversed loads with auto-increment register +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_load_pbr<string mnemonic, RegisterClass RC, + MemAccessSize addrSize, bits<4> majOp> + : LDInst + <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ $Mu:brev)" , + [] , "$Rz = $_dst_" > { + + let accessSize = addrSize; + + bits<5> dst; + bits<5> Rz; + bits<1> Mu; + + let IClass = 0b1001; + + let Inst{27-25} = 0b111; + let Inst{24-21} = majOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +let hasNewValue =1, opNewValue = 0 in { + def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>; + def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>; + def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>; + def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>; + def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>; + def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>; + def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>; +} + +def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>; +def L2_loadbsw4_pbr : T_load_pbr <"membh", DoubleRegs, WordAccess, 0b0111>; +def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>; + +def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>; +def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs, + HalfWordAccess, 0b0010>; + +//===----------------------------------------------------------------------===// +// Bit-reversed loads - Pseudo +// +// Please note that 'src2' doesn't appear in the AsmString because +// it's same as 'dst'. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in +class T_load_pbr_pseudo <string opc, RegisterClass RC> + : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + ".error \"$dst = "#opc#"($src1++$src3:brev)\"", + [], "$src1 = $_dst_">; + +def L2_loadrb_pbr_pseudo : T_load_pbr_pseudo <"memb", IntRegs>; +def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>; +def L2_loadrh_pbr_pseudo : T_load_pbr_pseudo <"memh", IntRegs>; +def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>; +def L2_loadri_pbr_pseudo : T_load_pbr_pseudo <"memw", IntRegs>; +def L2_loadrd_pbr_pseudo : T_load_pbr_pseudo <"memd", DoubleRegs>; + +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/ALU + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Template Class +// MPYS / Multipy signed/unsigned halfwords +//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] +//===----------------------------------------------------------------------===// + +let hasNewValue = 1, opNewValue = 0 in +class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd, + bit hasShift, bit isUnsigned> + : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1","") + #!if(isRnd,":rnd","") + #!if(isSat,":sat",""), + [], "", M_tc_3x_SLOT23 > { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1100; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isRnd; + let Inst{7} = isSat; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } + +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>; +def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>; +def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>; +def M2_mpy_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 0>; +def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>; +def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>; +def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>; +def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>; + +//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>; +def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>; +def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>; +def M2_mpyu_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 1>; +def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>; +def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>; +def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>; +def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>; + +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd +def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>; +def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>; +def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>; +def M2_mpy_rnd_lh_s0: T_M2_mpy <0b01, 0, 1, 0, 0>; +def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>; +def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>; +def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>; +def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>; + +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] +let Defs = [USR_OVF] in { + def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>; + def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>; + def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>; + def M2_mpy_sat_lh_s0: T_M2_mpy <0b01, 1, 0, 0, 0>; + def M2_mpy_sat_hl_s1: T_M2_mpy <0b10, 1, 0, 1, 0>; + def M2_mpy_sat_hl_s0: T_M2_mpy <0b10, 1, 0, 0, 0>; + def M2_mpy_sat_hh_s1: T_M2_mpy <0b11, 1, 0, 1, 0>; + def M2_mpy_sat_hh_s0: T_M2_mpy <0b11, 1, 0, 0, 0>; + + def M2_mpy_sat_rnd_ll_s1: T_M2_mpy <0b00, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_ll_s0: T_M2_mpy <0b00, 1, 1, 0, 0>; + def M2_mpy_sat_rnd_lh_s1: T_M2_mpy <0b01, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_lh_s0: T_M2_mpy <0b01, 1, 1, 0, 0>; + def M2_mpy_sat_rnd_hl_s1: T_M2_mpy <0b10, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_hl_s0: T_M2_mpy <0b10, 1, 1, 0, 0>; + def M2_mpy_sat_rnd_hh_s1: T_M2_mpy <0b11, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_hh_s0: T_M2_mpy <0b11, 1, 1, 0, 0>; +} + +//===----------------------------------------------------------------------===// +// Template Class +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the accumulator. +//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// + +let hasNewValue = 1, opNewValue = 0 in +class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac, + bit hasShift, bit isUnsigned > + : MInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy") + #"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1","") + #!if(isSat,":sat",""), + [], "$dst2 = $Rx", M_tc_3x_SLOT23 > { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + let Inst{27-24} = 0b1110; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isNac; + let Inst{7} = isSat; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rx; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } + +//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>; +def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>; +def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>; +def M2_mpy_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 0>; +def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>; +def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>; +def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>; +def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>; + +//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>; +def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>; +def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>; +def M2_mpyu_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 1>; +def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>; +def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>; +def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>; +def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>; + +//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>; +def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>; +def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>; +def M2_mpy_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 0>; +def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>; +def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>; +def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>; +def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>; + +//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>; +def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>; +def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>; +def M2_mpyu_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 1>; +def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>; +def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>; +def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>; +def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>; + +//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat +def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>; +def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>; +def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>; +def M2_mpy_acc_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 0, 0, 0>; +def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>; +def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>; +def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>; +def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>; + +//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat +def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>; +def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>; +def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>; +def M2_mpy_nac_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 1, 0, 0>; +def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>; +def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>; +def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>; +def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>; + +//===----------------------------------------------------------------------===// +// Template Class +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the 64-bit destination register. +//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// + +class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned> + : MInst_acc<(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rxx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy") + #"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1",""), + [], "$dst2 = $Rxx", M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0110; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isNac; + let Inst{7} = 0; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } + +def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>; +def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>; +def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>; +def M2_mpyd_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 0>; + +def M2_mpyd_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 0>; +def M2_mpyd_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 0>; +def M2_mpyd_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 0>; +def M2_mpyd_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 0>; + +def M2_mpyd_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 0>; +def M2_mpyd_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 0>; +def M2_mpyd_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 0>; +def M2_mpyd_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 0>; + +def M2_mpyd_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 0>; +def M2_mpyd_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 0>; +def M2_mpyd_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 0>; +def M2_mpyd_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 0>; + +def M2_mpyud_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 1>; +def M2_mpyud_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 1>; +def M2_mpyud_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 1>; +def M2_mpyud_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 1>; + +def M2_mpyud_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 1>; +def M2_mpyud_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 1>; +def M2_mpyud_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 1>; +def M2_mpyud_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 1>; + +def M2_mpyud_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 1>; +def M2_mpyud_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 1>; +def M2_mpyud_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 1>; +def M2_mpyud_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 1>; + +def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>; +def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>; +def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>; +def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>; + +//===----------------------------------------------------------------------===// +// Template Class -- Vector Multipy +// Used for complex multiply real or imaginary, dual multiply and even halfwords +//===----------------------------------------------------------------------===// +class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift, + bit isRnd, bit isSat > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd","") + #!if(isSat,":sat",""), + [] > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat +let Defs = [USR_OVF] in { +def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>; +def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>; + +// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat +def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>; +def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>; + +// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat +def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>; +def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>; + +// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat +def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>; +def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>; + +//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>; +def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>; +def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>; +def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>; + +//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>; +def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>; +def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>; +def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>; + +//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>; +def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>; +def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>; +def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>; + +//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>; +def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>; +def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>; +def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>; +} + +let hasNewValue = 1, opNewValue = 0 in +class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC, + bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0, + string op2Suffix = "", bit isRaw = 0, bit isHi = 0 > + : MInst <(outs IntRegs:$dst), (ins RC:$src1, RC:$src2), + "$dst = "#mnemonic + #"($src1, $src2"#op2Suffix#")" + #!if(MajOp{2}, ":<<1", "") + #!if(isRnd, ":rnd", "") + #!if(isSat, ":sat", "") + #!if(isRaw, !if(isHi, ":raw:hi", ":raw:lo"), ""), [] > { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let IClass = 0b1110; + + let Inst{27-24} = RegTyBits; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = 0b0; + let Inst{12-8} = src2; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi> + : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>; + +class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0 > + : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>; + +class T_MType_rr1 <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0 > + : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd>; + +class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0, string op2str = "" > + : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>; + +def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>; +def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>; +def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>; + +let CextOpcode = "mpyi", InputType = "reg" in +def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel; + +def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>; +def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>; + +def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>; + +def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>; +def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>; + +def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">; +def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">; + +def M2_cmpyrs_s0 : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>; +def M2_cmpyrs_s1 : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>; +def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">; +def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">; + +// V4 Instructions +def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>; +def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>; +def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>; +def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>; + +def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">; +def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">; + +def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; + +let hasNewValue = 1, opNewValue = 0 in +class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern> + : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8), + "$Rd ="#!if(isNeg, "- ", "+ ")#"mpyi($Rs, #$u8)" , + pattern, "", M_tc_3x_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<8> u8; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0000; + let Inst{23} = isNeg; + let Inst{13} = 0b0; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; + let Inst{12-5} = u8; + } + +let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in +def M2_mpysip : T_MType_mpy_ri <0, u8Ext, + [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>; + +def M2_mpysin : T_MType_mpy_ri <1, u8Imm, + [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs, + u8ImmPred:$u8)))]>; + +// Assember mapped to M2_mpyi +let isAsmParserOnly = 1 in +def M2_mpyui : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyui($src1, $src2)">; + +// Rd=mpyi(Rs,#m9) +// s9 is NOT the same as m9 - but it works.. so far. +// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8) +// depending on the value of m9. See Arch Spec. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, + CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1, + isAsmParserOnly = 1 in +def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), + "$dst = mpyi($src1, #$src2)", + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + s32ImmPred:$src2))]>, ImmRegRel; + +let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3, + InputType = "imm" in +class T_MType_acc_ri <string mnemonic, bits<3> MajOp, Operand ImmOp, + list<dag> pattern = []> + : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, ImmOp:$src3), + "$dst "#mnemonic#"($src2, #$src3)", + pattern, "$src1 = $dst", M_tc_2_SLOT23> { + bits<5> dst; + bits<5> src2; + bits<8> src3; + + let IClass = 0b1110; + + let Inst{27-26} = 0b00; + let Inst{25-23} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b0; + let Inst{12-5} = src3; + let Inst{4-0} = dst; + } + +let InputType = "reg", hasNewValue = 1 in +class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSwap = 0, list<dag> pattern = [], bit hasNot = 0, + bit isSat = 0, bit isShift = 0> + : MInst < (outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst "#mnemonic#"($src2, "#!if(hasNot, "~$src3)","$src3)") + #!if(isShift, ":<<1", "") + #!if(isSat, ":sat", ""), + pattern, "$src1 = $dst", M_tc_2_SLOT23 > { + bits<5> dst; + bits<5> src2; + bits<5> src3; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = MajOp; + let Inst{20-16} = !if(isSwap, src3, src2); + let Inst{13} = 0b0; + let Inst{12-8} = !if(isSwap, src2, src3); + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in { + def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext, + [(set (i32 IntRegs:$dst), + (add (mul IntRegs:$src2, u32ImmPred:$src3), + IntRegs:$src1))]>, ImmRegRel; + + def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, + [(set (i32 IntRegs:$dst), + (add (mul IntRegs:$src2, IntRegs:$src3), + IntRegs:$src1))]>, ImmRegRel; +} + +let CextOpcode = "ADD_acc" in { + let isExtentSigned = 1 in + def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, + [(set (i32 IntRegs:$dst), + (add (add (i32 IntRegs:$src2), s32ImmPred:$src3), + (i32 IntRegs:$src1)))]>, ImmRegRel; + + def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, + [(set (i32 IntRegs:$dst), + (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + (i32 IntRegs:$src1)))]>, ImmRegRel; +} + +let CextOpcode = "SUB_acc" in { + let isExtentSigned = 1 in + def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel; + + def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel; +} + +let Itinerary = M_tc_3x_SLOT23 in +def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>; + +def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>; +def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>; + +class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp, + PatLeaf ImmPred> + : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), + (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; + +class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), + (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; +def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>; + +def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>; +def : T_MType_acc_pat2 <M2_nacci, add, sub>; + +//===----------------------------------------------------------------------===// +// Template Class -- XType Vector Instructions +//===----------------------------------------------------------------------===// +class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"), + [] > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj > + : MInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"), + [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_XTYPE_Vect_diff < bits<3> MajOp, string opc > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss), + "$Rdd = "#opc#"($Rtt, $Rss)", + [], "",M_tc_2_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32) +def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>; +def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>; + +// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt) +def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>; +def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>; + +// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss) +def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">; + +// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss) +def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">; + +// Vector reduce complex multiply real or imaginary: +// Rdd[+]=vrcmpy[ir](Rss,Rtt[*]) +def M2_vrcmpyi_s0: T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>; +def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>; +def M2_vrcmaci_s0: T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>; +def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>; + +def M2_vrcmpyr_s0: T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>; +def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>; +def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>; +def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>; + +// Vector reduce halfwords: +// Rdd[+]=vrmpyh(Rss,Rtt) +def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>; +def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>; + +//===----------------------------------------------------------------------===// +// Template Class -- Vector Multipy with accumulation. +// Used for complex multiply real or imaginary, dual multiply and even halfwords +//===----------------------------------------------------------------------===// +let Defs = [USR_OVF] in +class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp, + bit hasShift, bit isRnd > + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd","")#":sat", + [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp, + bit hasShift, bit isRnd > + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd",""), + [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector multiply word by signed half with accumulation +// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>; +def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>; +def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>; +def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>; + +def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>; +def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>; +def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>; +def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>; + +// Vector multiply word by unsigned half with accumulation +// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmaculs_s1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>; +def M2_mmaculs_s0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>; +def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>; +def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>; + +def M2_mmacuhs_s1: T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>; +def M2_mmacuhs_s0: T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>; +def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>; +def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>; + +// Vector multiply even halfwords with accumulation +// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat] +def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>; +def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>; +def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>; + +// Vector dual multiply with accumulation +// Rxx+=vdmpy(Rss,Rtt)[:sat] +def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>; +def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>; + +// Vector complex multiply real or imaginary with accumulation +// Rxx+=vcmpy[ir](Rss,Rtt):sat +def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>; +def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>; + +//===----------------------------------------------------------------------===// +// Template Class -- Multiply signed/unsigned halfwords with and without +// saturation and rounding +//===----------------------------------------------------------------------===// +class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned > + : MInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rdd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1","") + #!if(isRnd,":rnd",""), + [] > { + bits<5> Rdd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0100; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isRnd; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; +} + +def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>; +def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>; +def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>; +def M2_mpyd_ll_s0: T_M2_mpyd<0b00, 0, 0, 0>; + +def M2_mpyd_hh_s1: T_M2_mpyd<0b11, 0, 1, 0>; +def M2_mpyd_hl_s1: T_M2_mpyd<0b10, 0, 1, 0>; +def M2_mpyd_lh_s1: T_M2_mpyd<0b01, 0, 1, 0>; +def M2_mpyd_ll_s1: T_M2_mpyd<0b00, 0, 1, 0>; + +def M2_mpyd_rnd_hh_s0: T_M2_mpyd<0b11, 1, 0, 0>; +def M2_mpyd_rnd_hl_s0: T_M2_mpyd<0b10, 1, 0, 0>; +def M2_mpyd_rnd_lh_s0: T_M2_mpyd<0b01, 1, 0, 0>; +def M2_mpyd_rnd_ll_s0: T_M2_mpyd<0b00, 1, 0, 0>; + +def M2_mpyd_rnd_hh_s1: T_M2_mpyd<0b11, 1, 1, 0>; +def M2_mpyd_rnd_hl_s1: T_M2_mpyd<0b10, 1, 1, 0>; +def M2_mpyd_rnd_lh_s1: T_M2_mpyd<0b01, 1, 1, 0>; +def M2_mpyd_rnd_ll_s1: T_M2_mpyd<0b00, 1, 1, 0>; + +//Rdd=mpyu(Rs.[HL],Rt.[HL])[:<<1] +def M2_mpyud_hh_s0: T_M2_mpyd<0b11, 0, 0, 1>; +def M2_mpyud_hl_s0: T_M2_mpyd<0b10, 0, 0, 1>; +def M2_mpyud_lh_s0: T_M2_mpyd<0b01, 0, 0, 1>; +def M2_mpyud_ll_s0: T_M2_mpyd<0b00, 0, 0, 1>; + +def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>; +def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>; +def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>; +def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>; + +//===----------------------------------------------------------------------===// +// Template Class for xtype mpy: +// Vector multiply +// Complex multiply +// multiply 32X32 and use full result +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_XTYPE_mpy64 <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat, bit hasShift, bit isConj> + : MInst <(outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rdd = "#mnemonic#"($Rs, $Rt"#!if(isConj,"*)",")") + #!if(hasShift,":<<1","") + #!if(isSat,":sat",""), + [] > { + bits<5> Rdd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0101; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + } + +//===----------------------------------------------------------------------===// +// Template Class for xtype mpy with accumulation into 64-bit: +// Vector multiply +// Complex multiply +// multiply 32X32 and use full result +//===----------------------------------------------------------------------===// +class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp, + bit isSat, bit hasShift, bit isConj> + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rxx "#op2#"= "#op1#"($Rs, $Rt"#!if(isConj,"*)",")") + #!if(hasShift,":<<1","") + #!if(isSat,":sat",""), + + [] , "$dst2 = $Rxx" > { + bits<5> Rxx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0111; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + } + +// MPY - Multiply and use full result +// Rdd = mpy[u](Rs,Rt) +def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>; +def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>; + +// Rxx[+-]= mpy[u](Rs,Rt) +def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>; +def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>; +def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>; +def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>; + +// Complex multiply real or imaginary +// Rxx=cmpy[ir](Rs,Rt) +def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>; +def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>; + +// Rxx+=cmpy[ir](Rs,Rt) +def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>; +def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>; + +// Complex multiply +// Rdd=cmpy(Rs,Rt)[:<<]:sat +def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>; +def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>; + +// Rdd=cmpy(Rs,Rt*)[:<<]:sat +def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>; +def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>; + +// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat +def M2_cmacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>; +def M2_cnacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>; +def M2_cmacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>; +def M2_cnacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>; + +// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat +def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>; +def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>; +def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>; +def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>; + +// Vector multiply halfwords +// Rdd=vmpyh(Rs,Rt)[:<<]:sat +//let Defs = [USR_OVF] in { + def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>; + def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>; +//} + +// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat] +def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>; +def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>; +def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>; + +def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))), + (i64 (anyext (i32 IntRegs:$src2))))), + (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))), + (i64 (sext (i32 IntRegs:$src2))))), + (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(i64 (mul (is_sext_i32:$src1), + (is_sext_i32:$src2))), + (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; + +// Multiply and accumulate, use full result. +// Rxx[+-]=mpy(Rs,Rt) + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))))), + (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))))), + (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (zext (i32 IntRegs:$src2))), + (i64 (zext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (zext (i32 IntRegs:$src2))), + (i64 (zext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYS + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/MPYS - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VB + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VB - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +// Store doubleword. +//===----------------------------------------------------------------------===// +// Template class for non-predicated post increment stores with immediate offset +//===----------------------------------------------------------------------===// +let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc in +class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp, bit isHalf > + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + mnemonic#"($src1++#$offset) = $src2"#!if(isHalf, ".h", ""), + [], "$src1 = $_dst_" >, + AddrModeRel { + bits<5> src1; + bits<5> src2; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1)); + + let IClass = 0b1010; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = 0b0; + let Inst{12-8} = src2; + let Inst{7} = 0b0; + let Inst{6-3} = offsetBits; + let Inst{1} = 0b0; + } + +//===----------------------------------------------------------------------===// +// Template class for predicated post increment stores with immediate offset +//===----------------------------------------------------------------------===// +let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in +class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew> + : STInst <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3"#!if(isHalf, ".h", ""), + [], "$src2 = $_dst_" >, + AddrModeRel { + bits<2> src1; + bits<5> src2; + bits<7> offset; + bits<5> src3; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1)); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + let IClass = 0b1010; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b1; + let Inst{12-8} = src3; + let Inst{7} = isPredNew; + let Inst{6-3} = offsetBits; + let Inst{2} = isPredNot; + let Inst{1-0} = src1; + } + +multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp, bits<4> MajOp, bit isHalf = 0 > { + + let BaseOpcode = "POST_"#BaseOp in { + def S2_#NAME#_pi : T_store_pi <mnemonic, RC, ImmOp, MajOp, isHalf>; + + // Predicated + def S2_p#NAME#t_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 0, 0>; + def S2_p#NAME#f_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 1, 0>; + + // Predicated new + def S2_p#NAME#tnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, + isHalf, 0, 1>; + def S2_p#NAME#fnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, + isHalf, 1, 1>; + } +} + +let accessSize = ByteAccess in +defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>; + +let accessSize = HalfWordAccess in +defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>; + +let accessSize = WordAccess in +defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>; + +let accessSize = DoubleWordAccess in +defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>; + +let accessSize = HalfWordAccess, isNVStorable = 0 in +defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>; + +class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, + InstHexagon MI> + : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), + (MI I32:$src2, imm:$offset, Value:$src1)>; + +def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>; +def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>; +def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>; +def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>; + +//===----------------------------------------------------------------------===// +// Template class for post increment stores with register offset. +//===----------------------------------------------------------------------===// +class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp, + MemAccessSize AccessSz, bit isHalf = 0> + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, RC:$src3), + mnemonic#"($src1++$src2) = $src3"#!if(isHalf, ".h", ""), + [], "$src1 = $_dst_" > { + bits<5> src1; + bits<1> src2; + bits<5> src3; + let accessSize = AccessSz; + + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1)); + + let IClass = 0b1010; + + let Inst{27-24} = 0b1101; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2; + let Inst{12-8} = src3; + let Inst{7} = 0b0; + } + +def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>; +def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>; +def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>; +def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>; +def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>; + +let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in +class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3> MajOp, bit isH = 0> + : STInst <(outs), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>, + AddrModeRel, ImmRegRel { + bits<5> src1; + bits<14> src2; // Actual address offset + bits<5> src3; + bits<11> offsetBits; // Represents offset encoding + + string ImmOpStr = !cast<string>(ImmOp); + + let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14, + !if (!eq(ImmOpStr, "s11_2Ext"), 13, + !if (!eq(ImmOpStr, "s11_1Ext"), 12, + /* s11_0Ext */ 11))); + let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), src2{13-3}, + !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2}, + !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1}, + /* s11_0Ext */ src2{10-0}))); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); + let IClass = 0b1010; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24} = 0b1; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = offsetBits{8}; + let Inst{12-8} = src3; + let Inst{7-0} = offsetBits{7-0}; + } + +let opExtendable = 2, isPredicated = 1 in +class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3>MajOp, bit PredNot, bit isPredNew, bit isH = 0> + : STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+#$src3) = $src4"#!if(isH,".h",""), + [],"",V2LDST_tc_st_SLOT01 >, + AddrModeRel, ImmRegRel { + bits<2> src1; + bits<5> src2; + bits<9> src3; // Actual address offset + bits<5> src4; + bits<6> offsetBits; // Represents offset encoding + + let isPredicatedNew = isPredNew; + let isPredicatedFalse = PredNot; + + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9, + !if (!eq(ImmOpStr, "u6_2Ext"), 8, + !if (!eq(ImmOpStr, "u6_1Ext"), 7, + /* u6_0Ext */ 6))); + let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), src3{8-3}, + !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2}, + !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1}, + /* u6_0Ext */ src3{5-0}))); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); + + let IClass = 0b0100; + + let Inst{27} = 0b0; + let Inst{26} = PredNot; + let Inst{25} = isPredNew; + let Inst{24} = 0b0; + let Inst{23-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = offsetBits{5}; + let Inst{12-8} = src4; + let Inst{7-3} = offsetBits{4-0}; + let Inst{1-0} = src1; + } + +let isExtendable = 1, hasSideEffects = 0 in +multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + def S2_#NAME#_io : T_store_io <mnemonic, RC, ImmOp, MajOp, isH>; + + // Predicated + def S2_p#NAME#t_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 0, 0, isH>; + def S2_p#NAME#f_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 1, 0, isH>; + + // Predicated new + def S4_p#NAME#tnew_io : T_pstore_io <mnemonic, RC, predImmOp, + MajOp, 0, 1, isH>; + def S4_p#NAME#fnew_io : T_pstore_io <mnemonic, RC, predImmOp, + MajOp, 1, 1, isH>; + } +} + +let addrMode = BaseImmOffset, InputType = "imm" in { + let accessSize = ByteAccess in + defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>; + + let accessSize = HalfWordAccess, opExtentAlign = 1 in + defm storerh: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, u6_1Ext, 0b010>; + + let accessSize = WordAccess, opExtentAlign = 2 in + defm storeri: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, u6_2Ext, 0b100>; + + let accessSize = DoubleWordAccess, isNVStorable = 0, opExtentAlign = 3 in + defm storerd: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext, + u6_3Ext, 0b110>; + + let accessSize = HalfWordAccess, opExtentAlign = 1 in + defm storerf: ST_Idxd < "memh", "STrif", IntRegs, s11_1Ext, + u6_1Ext, 0b011, 1>; +} + +// Patterns for generating stores, where the address takes different forms: +// - frameindex, +// - frameindex + offset, +// - base + offset, +// - simple (base address without offset). +// These would usually be used together (via Storex_pat defined below), but +// in some cases one may want to apply different properties (such as +// AddedComplexity) to the individual patterns. +class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; +class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; +class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> + : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, Value:$Rt)>; + +// Patterns for generating stores, where the address takes different forms, +// and where the value being stored is transformed through the value modifier +// ValueMod. The address forms are same as above. +class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), + (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; +class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; +class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> + : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; + +multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + InstHexagon MI> { + def: Storex_fi_pat <Store, Value, MI>; + def: Storex_fi_add_pat <Store, Value, ImmPred, MI>; + def: Storex_add_pat <Store, Value, ImmPred, MI>; +} + +multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + PatFrag ValueMod, InstHexagon MI> { + def: Storexm_fi_pat <Store, Value, ValueMod, MI>; + def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>; + def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; +} + +// Regular stores in the DAG have two operands: value and address. +// Atomic stores also have two, but they are reversed: address, value. +// To use atomic stores with the patterns, they need to have their operands +// swapped. This relies on the knowledge that the F.Fragment uses names +// "ptr" and "val". +class SwapSt<PatFrag F> + : PatFrag<(ops node:$val, node:$ptr), F.Fragment>; + +let AddedComplexity = 20 in { + defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>; + + defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>; +} + +// Simple patterns should be tried with the least priority. +def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>; +def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>; +def: Storex_simple_pat<store, I32, S2_storeri_io>; +def: Storex_simple_pat<store, I64, S2_storerd_io>; + +def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>; +def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>; +def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>; +def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>; + +let AddedComplexity = 20 in { + defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>; + defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>; + defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>; +} + +def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; +def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>; +def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>; + +// Store predicate. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +def STriw_pred : STInst<(outs), + (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1), + ".error \"should not emit\"", []>; + +// S2_allocframe: Allocate stack frame. +let Defs = [R29, R30], Uses = [R29, R31, R30], + hasSideEffects = 0, accessSize = DoubleWordAccess in +def S2_allocframe: ST0Inst < + (outs), (ins u11_3Imm:$u11_3), + "allocframe(#$u11_3)" > { + bits<14> u11_3; + + let IClass = 0b1010; + let Inst{27-16} = 0b000010011101; + let Inst{13-11} = 0b000; + let Inst{10-0} = u11_3{13-3}; + } + +// S2_storer[bhwdf]_pci: Store byte/half/word/double. +// S2_storer[bhwdf]_pci -> S2_storerbnew_pci +let Uses = [CS] in +class T_store_pci <string mnemonic, RegisterClass RC, + Operand Imm, bits<4>MajOp, + MemAccessSize AlignSize, string RegSrc = "Rt"> + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, RC:$Rt), + #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $"#RegSrc#"", + [] , + "$Rz = $_dst_" > { + bits<5> Rz; + bits<7> offset; + bits<1> Mu; + bits<5> Rt; + let accessSize = AlignSize; + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, + !if(!eq(RegSrc,"Rt.h"), 0, 1)); + + let IClass = 0b1010; + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-8} = Rt; + let Inst{7} = 0b0; + let Inst{6-3} = + !if (!eq(!cast<string>(AlignSize), "DoubleWordAccess"), offset{6-3}, + !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2}, + !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1}, + /* ByteAccess */ offset{3-0}))); + let Inst{1} = 0b0; + } + +def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000, + ByteAccess>; +def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010, + HalfWordAccess>; +def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011, + HalfWordAccess, "Rt.h">; +def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100, + WordAccess>; +def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110, + DoubleWordAccess>; + +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in +class T_storenew_pci <string mnemonic, Operand Imm, + bits<2>MajOp, MemAccessSize AlignSize> + : NVInst < (outs IntRegs:$_dst_), + (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new", + [], + "$Rz = $_dst_"> { + bits<5> Rz; + bits<6> offset; + bits<1> Mu; + bits<3> Nt; + + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-21} = 0b1001101; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-11} = MajOp; + let Inst{10-8} = Nt; + let Inst{7} = 0b0; + let Inst{6-3} = + !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2}, + !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1}, + /* ByteAccess */ offset{3-0})); + let Inst{1} = 0b0; + } + +def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>; +def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>; +def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>; + +//===----------------------------------------------------------------------===// +// Circular stores - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in +class T_store_pci_pseudo <string opc, RegisterClass RC> + : STInstPI<(outs IntRegs:$_dst_), + (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4), + ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"", + [], "$_dst_ = $src1">; + +def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>; +def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; +def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; +def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>; +def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>; + +//===----------------------------------------------------------------------===// +// Circular stores with auto-increment register +//===----------------------------------------------------------------------===// +let Uses = [CS] in +class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp, + MemAccessSize AlignSize, string RegSrc = "Rt"> + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, RC:$Rt), + #mnemonic#"($Rz ++ I:circ($Mu)) = $"#RegSrc#"", + [], + "$Rz = $_dst_" > { + bits<5> Rz; + bits<1> Mu; + bits<5> Rt; + + let accessSize = AlignSize; + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, + !if(!eq(RegSrc,"Rt.h"), 0, 1)); + + let IClass = 0b1010; + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-8} = Rt; + let Inst{7} = 0b0; + let Inst{1} = 0b1; + } + +def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>; +def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>; +def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>; +def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>; +def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011, + HalfWordAccess, "Rt.h">; + +//===----------------------------------------------------------------------===// +// Circular .new stores with auto-increment register +//===----------------------------------------------------------------------===// +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in +class T_storenew_pcr <string mnemonic, bits<2>MajOp, + MemAccessSize AlignSize> + : NVInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" , + [] , + "$Rz = $_dst_"> { + bits<5> Rz; + bits<1> Mu; + bits<3> Nt; + + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-21} = 0b1001101; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-11} = MajOp; + let Inst{10-8} = Nt; + let Inst{7} = 0b0; + let Inst{1} = 0b1; + } + +def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>; +def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>; +def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>; + +//===----------------------------------------------------------------------===// +// Bit-reversed stores with auto-increment register +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_store_pbr<string mnemonic, RegisterClass RC, + MemAccessSize addrSize, bits<3> majOp, + bit isHalf = 0> + : STInst + <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, RC:$src), + #mnemonic#"($Rz ++ $Mu:brev) = $src"#!if (!eq(isHalf, 1), ".h", ""), + [], "$Rz = $_dst_" > { + + let accessSize = addrSize; + + bits<5> Rz; + bits<1> Mu; + bits<5> src; + + let IClass = 0b1010; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = majOp; + let Inst{7} = 0b0; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-8} = src; + } + +let isNVStorable = 1 in { + let BaseOpcode = "S2_storerb_pbr" in + def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess, + 0b000>, NewValueRel; + let BaseOpcode = "S2_storerh_pbr" in + def S2_storerh_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, + 0b010>, NewValueRel; + let BaseOpcode = "S2_storeri_pbr" in + def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess, + 0b100>, NewValueRel; +} + +def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>; +def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>; + +//===----------------------------------------------------------------------===// +// Bit-reversed .new stores with auto-increment register +//===----------------------------------------------------------------------===// +let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3, + hasSideEffects = 0 in +class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp> + : NVInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [], + "$Rz = $_dst_">, NewValueRel { + let accessSize = addrSize; + bits<5> Rz; + bits<1> Mu; + bits<3> Nt; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1111101; + let Inst{12-11} = majOp; + let Inst{7} = 0b0; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{10-8} = Nt; + } + +let BaseOpcode = "S2_storerb_pbr" in +def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>; + +let BaseOpcode = "S2_storerh_pbr" in +def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>; + +let BaseOpcode = "S2_storeri_pbr" in +def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>; + +//===----------------------------------------------------------------------===// +// Bit-reversed stores - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in +class T_store_pbr_pseudo <string opc, RegisterClass RC> + : STInstPI<(outs IntRegs:$_dst_), + (ins IntRegs:$src1, RC:$src2, IntRegs:$src3), + ".error \""#opc#"($src1++$src3:brev) = $src2\"", + [], "$_dst_ = $src1">; + +def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>; +def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; +def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>; +def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; +def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>; + +//===----------------------------------------------------------------------===// +// ST - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Template class for S_2op instructions. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut, + RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat> + : SInst <(outs RCOut:$dst), (ins RCIn:$src), + "$dst = "#mnemonic#"($src)"#!if(isSat, ":sat", ""), + [], "", S_2op_tc_1_SLOT23 > { + bits<5> dst; + bits<5> src; + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23-22} = MajOp; + let Inst{21} = 0b0; + let Inst{20-16} = src; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +class T_S2op_1_di <string mnemonic, bits<2> MajOp, bits<3> MinOp> + : T_S2op_1 <mnemonic, 0b0100, DoubleRegs, IntRegs, MajOp, MinOp, 0>; + +let hasNewValue = 1 in +class T_S2op_1_id <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0> + : T_S2op_1 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, isSat>; + +let hasNewValue = 1 in +class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0> + : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>; + +// Vector sign/zero extend +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>; + def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>; + def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>; + def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>; +} + +// Vector splat bytes/halfwords +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>; + def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>; +} + +// Sign extend word to doubleword +def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>; + +def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; + +// Vector saturate and pack +let Defs = [USR_OVF] in { + def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>; + def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>; + def S2_vsathb : T_S2op_1_id <"vsathb", 0b00, 0b110>; + def S2_vsathub : T_S2op_1_id <"vsathub", 0b00, 0b000>; + def S2_vsatwh : T_S2op_1_id <"vsatwh", 0b00, 0b010>; + def S2_vsatwuh : T_S2op_1_id <"vsatwuh", 0b00, 0b100>; +} + +// Vector truncate +def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>; +def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>; + +// Swizzle the bytes of a word +def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>; + +// Saturate +let Defs = [USR_OVF] in { + def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>; + def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>; + def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>; + def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>; + def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>; + def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>; +} + +let Itinerary = S_2op_tc_2_SLOT23 in { + // Vector round and pack + def S2_vrndpackwh : T_S2op_1_id <"vrndwh", 0b10, 0b100>; + + let Defs = [USR_OVF] in + def S2_vrndpackwhs : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>; + + // Bit reverse + def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>; + + // Absolute value word + def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>; + + let Defs = [USR_OVF] in + def A2_abssat : T_S2op_1_ii <"abs", 0b10, 0b101, 1>; + + // Negate with saturation + let Defs = [USR_OVF] in + def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>; +} + +def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)), + (i32 (sub 0, (i32 IntRegs:$src))), + (i32 IntRegs:$src))), + (A2_abs IntRegs:$src)>; + +let AddedComplexity = 50 in +def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)), + (i32 IntRegs:$src)), + (sra (i32 IntRegs:$src), (i32 31)))), + (A2_abs IntRegs:$src)>; + +class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut, + RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp, + bit isSat, bit isRnd, list<dag> pattern = []> + : SInst <(outs RCOut:$dst), + (ins RCIn:$src, u5Imm:$u5), + "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "") + #!if(isRnd, ":rnd", ""), + pattern, "", S_2op_tc_2_SLOT23> { + bits<5> dst; + bits<5> src; + bits<5> u5; + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23-21} = MajOp; + let Inst{20-16} = src; + let Inst{13} = 0b0; + let Inst{12-8} = u5; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp> + : T_S2op_2 <mnemonic, 0b1000, DoubleRegs, IntRegs, MajOp, MinOp, 0, 0>; + +let hasNewValue = 1 in +class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp> + : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>; + +let hasNewValue = 1 in +class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0, list<dag> pattern = []> + : T_S2op_2 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, + isSat, isRnd, pattern>; + +class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd> + : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, + [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src), + (u5ImmPred:$u5)))]>; + +// Vector arithmetic shift right by immediate with truncate and pack +def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>; + +// Arithmetic/logical shift right/left by immediate +let Itinerary = S_2op_tc_1_SLOT23 in { + def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>; + def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>; + def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>; +} + +// Shift left by immediate with saturation +let Defs = [USR_OVF] in +def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>; + +// Shift right with round +def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>; + +let isAsmParserOnly = 1 in +def S2_asr_i_r_rnd_goodsyntax + : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5Imm:$u5), + "$dst = asrrnd($src, #$u5)", + [], "", S_2op_tc_1_SLOT23>; + +let isAsmParserOnly = 1 in +def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src), + "$dst = not($src)">; + +def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)), + (i32 1))), + (i32 1))), + (S2_asr_i_r_rnd IntRegs:$src1, u5ImmPred:$src2)>; + +class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0> + : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), + "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> { + bits<5> Rss; + bits<5> Rdd; + let IClass = 0b1000; + let Inst{27-24} = 0; + let Inst{23-22} = MajOp; + let Inst{20-16} = Rss; + let Inst{7-5} = minOp; + let Inst{4-0} = Rdd; +} + +def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>; +def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>; +def A2_notp : T_S2op_3 <"not", 0b10, 0b100>; + +// Innterleave/deinterleave +def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>; +def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>; + +// Vector Complex conjugate +def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>; + +// Vector saturate without pack +def S2_vsathb_nopack : T_S2op_3 <"vsathb", 0b00, 0b111>; +def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>; +def S2_vsatwh_nopack : T_S2op_3 <"vsatwh", 0b00, 0b110>; +def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>; + +// Vector absolute value halfwords with and without saturation +// Rdd64=vabsh(Rss64)[:sat] +def A2_vabsh : T_S2op_3 <"vabsh", 0b01, 0b100>; +def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>; + +// Vector absolute value words with and without saturation +def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>; +def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>; + +def : Pat<(not (i64 DoubleRegs:$src1)), + (A2_notp DoubleRegs:$src1)>; + +//===----------------------------------------------------------------------===// +// STYPE/BIT + +//===----------------------------------------------------------------------===// +// Bit count + +let hasSideEffects = 0, hasNewValue = 1 in +class T_COUNT_LEADING<string MnOp, bits<3> MajOp, bits<3> MinOp, bit Is32, + dag Out, dag Inp> + : SInst<Out, Inp, "$Rd = "#MnOp#"($Rs)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rs; + bits<5> Rd; + let IClass = 0b1000; + let Inst{27} = 0b1; + let Inst{26} = Is32; + let Inst{25-24} = 0b00; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; +} + +class T_COUNT_LEADING_32<string MnOp, bits<3> MajOp, bits<3> MinOp> + : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b1, + (outs IntRegs:$Rd), (ins IntRegs:$Rs)>; + +class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp> + : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0, + (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>; + +def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>; +def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>; +def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>; +def S2_ct1 : T_COUNT_LEADING_32<"ct1", 0b010, 0b101>; +def S2_cl0p : T_COUNT_LEADING_64<"cl0", 0b010, 0b010>; +def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>; +def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>; +def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>; +def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>; + +// Count leading zeros. +def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; +def: Pat<(i32 (ctlz_zero_undef I32:$Rs)), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz_zero_undef I64:$Rss))), (S2_cl0p I64:$Rss)>; + +// Count trailing zeros: 32-bit. +def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (cttz_zero_undef I32:$Rs)), (S2_ct0 I32:$Rs)>; + +// Count leading ones. +def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; +def: Pat<(i32 (ctlz_zero_undef (not I32:$Rs))), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz_zero_undef (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; + +// Count trailing ones: 32-bit. +def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (cttz_zero_undef (not I32:$Rs))), (S2_ct1 I32:$Rs)>; + +// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td. + +// Bit set/clear/toggle + +let hasSideEffects = 0, hasNewValue = 1 in +class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp> + : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5Imm:$u5), + "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> u5; + let IClass = 0b1000; + let Inst{27-21} = 0b1100110; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = u5; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0, hasNewValue = 1 in +class T_SCT_BIT_REG<string MnOp, bits<2> MinOp> + : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = "#MnOp#"($Rs, $Rt)", [], "", S_3op_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + let IClass = 0b1100; + let Inst{27-22} = 0b011010; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-6} = MinOp; + let Inst{4-0} = Rd; +} + +def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>; +def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>; +def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>; +def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>; +def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>; +def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>; + +def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))), + (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>; +def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))), + (S2_setbit_i IntRegs:$Rs, u5ImmPred:$u5)>; +def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))), + (S2_togglebit_i IntRegs:$Rs, u5ImmPred:$u5)>; +def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))), + (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), + (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), + (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; + +// Bit test + +let hasSideEffects = 0 in +class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5Imm:$u5), + "$Pd = "#MnOp#"($Rs, #$u5)", + [], "", S_2op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> u5; + let IClass = 0b1000; + let Inst{27-24} = 0b0101; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = 0; + let Inst{12-8} = u5; + let Inst{1-0} = Pd; +} + +let hasSideEffects = 0 in +class T_TEST_BIT_REG<string MnOp, bit IsNeg> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#MnOp#"($Rs, $Rt)", + [], "", S_3op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + let IClass = 0b1100; + let Inst{27-22} = 0b011100; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{1-0} = Pd; +} + +def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>; +def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>; + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), + (S2_tstbit_i IntRegs:$Rs, u5ImmPred:$u5)>; + def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), + (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (trunc (i32 IntRegs:$Rs))), + (S2_tstbit_i IntRegs:$Rs, 0)>; + def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))), + (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; +} + +let hasSideEffects = 0 in +class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6), + "$Pd = "#MnOp#"($Rs, #$u6)", + [], "", S_2op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<6> u6; + let IClass = 0b1000; + let Inst{27-24} = 0b0101; + let Inst{23-22} = MajOp; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{13-8} = u6; + let Inst{1-0} = Pd; +} + +let hasSideEffects = 0 in +class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#MnOp#"($Rs, $Rt)", + [], "", S_3op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + let IClass = 0b1100; + let Inst{27-24} = 0b0111; + let Inst{23-22} = MajOp; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{1-0} = Pd; +} + +def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>; +def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>; +def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>; + +let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. + def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)), + (C2_bitsclri IntRegs:$Rs, u6ImmPred:$u6)>; + def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)), + (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; +} + +let AddedComplexity = 10 in // Complexity greater than compare reg-reg. +def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), + (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; + +//===----------------------------------------------------------------------===// +// STYPE/BIT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/COMPLEX - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// XTYPE/PERM + +//===----------------------------------------------------------------------===// + +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))), + (i32 8)), + (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))), + (zextloadi8 (i32 IntRegs:$b))), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; + +//===----------------------------------------------------------------------===// +// XTYPE/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/PRED + +//===----------------------------------------------------------------------===// + +// Predicate transfer. +let hasSideEffects = 0, hasNewValue = 1 in +def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps), + "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<2> Ps; + + let IClass = 0b1000; + let Inst{27-24} = 0b1001; + let Inst{22} = 0b1; + let Inst{17-16} = Ps; + let Inst{4-0} = Rd; +} + +// Transfer general register to predicate. +let hasSideEffects = 0 in +def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs), + "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + + let IClass = 0b1000; + let Inst{27-21} = 0b0101010; + let Inst{20-16} = Rs; + let Inst{1-0} = Pd; +} + +let hasSideEffects = 0, isCodeGenOnly = 1 in +def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src), + "$dst = $src">; + + +// Patterns for loads of i1: +def: Pat<(i1 (load AddrFI:$fi)), + (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; +def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; +def: Pat<(i1 (load (i32 IntRegs:$Rs))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; + +def I1toI32: OutPatFrag<(ops node:$Rs), + (C2_muxii (i1 $Rs), 1, 0)>; + +def I32toI1: OutPatFrag<(ops node:$Rs), + (i1 (C2_tfrrp (i32 $Rs)))>; + +defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>; +def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; + +//===----------------------------------------------------------------------===// +// STYPE/PRED - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT + +//===----------------------------------------------------------------------===// +class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp, + Operand Imm, list<dag> pattern = [], bit isRnd = 0> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, Imm:$src2), + "$dst = "#Mnemonic#"($src1, #$src2)"#!if(isRnd, ":rnd", ""), + pattern> { + bits<5> src1; + bits<5> dst; + let IClass = 0b1000; + let Inst{27-24} = 0; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; +} + +class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp> + : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6Imm, + [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]> { + bits<6> src2; + let Inst{13-8} = src2; +} + +// Shift by immediate. +def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>; +def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>; +def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>; + +// Shift left by small amount and add. +let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in +def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), + (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3), + "$Rd = addasl($Rt, $Rs, #$u3)" , + [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt), + (shl (i32 IntRegs:$Rs), u3ImmPred:$u3)))], + "", S_3op_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rt; + bits<5> Rs; + bits<3> u3; + + let IClass = 0b1100; + + let Inst{27-21} = 0b0100000; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7-5} = u3; + let Inst{4-0} = Rd; + } + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VW + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VW - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/USER + +//===----------------------------------------------------------------------===// +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; + +let hasSideEffects = 1, isSoloAX = 1 in +def Y2_barrier : SYSInst<(outs), (ins), + "barrier", + [(HexagonBARRIER)],"",ST_tc_st_SLOT0> { + let Inst{31-28} = 0b1010; + let Inst{27-21} = 0b1000000; +} + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER - +//===----------------------------------------------------------------------===// + +// Generate frameindex addresses. The main reason for the offset operand is +// that every instruction that is allowed to have frame index as an operand +// will then have that operand followed by an immediate operand (the offset). +// This simplifies the frame-index elimination code. +// +let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, + isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { + def TFR_FI : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$fi, s32Imm:$off), "">; + def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">; +} + +//===----------------------------------------------------------------------===// +// CRUSER - Type. +//===----------------------------------------------------------------------===// +// HW loop +let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, hasSideEffects = 0 in +class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0> + : CRInst<(outs), (ins brOp:$offset, u10Imm:$src2), + #mnemonic#"($offset, #$src2)", + [], "" , CR_tc_3x_SLOT3> { + bits<9> offset; + bits<10> src2; + + let IClass = 0b0110; + + let Inst{27-22} = 0b100100; + let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1); + let Inst{20-16} = src2{9-5}; + let Inst{12-8} = offset{8-4}; + let Inst{7-5} = src2{4-2}; + let Inst{4-3} = offset{3-2}; + let Inst{1-0} = src2{1-0}; +} + +let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, hasSideEffects = 0 in +class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0> + : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2), + #mnemonic#"($offset, $src2)", + [], "" ,CR_tc_3x_SLOT3> { + bits<9> offset; + bits<5> src2; + + let IClass = 0b0110; + + let Inst{27-22} = 0b000000; + let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1); + let Inst{20-16} = src2; + let Inst{12-8} = offset{8-4}; + let Inst{4-3} = offset{3-2}; + } + +multiclass LOOP_ri<string mnemonic> { + def i : LOOP_iBase<mnemonic, brtarget>; + def r : LOOP_rBase<mnemonic, brtarget>; + + let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in { + def iext: LOOP_iBase<mnemonic, brtargetExt, 1>; + def rext: LOOP_rBase<mnemonic, brtargetExt, 1>; + } +} + + +let Defs = [SA0, LC0, USR] in +defm J2_loop0 : LOOP_ri<"loop0">; + +// Interestingly only loop0's appear to set usr.lpcfg +let Defs = [SA1, LC1] in +defm J2_loop1 : LOOP_ri<"loop1">; + +let isBranch = 1, isTerminator = 1, hasSideEffects = 0, + Defs = [PC, LC0], Uses = [SA0, LC0] in { +def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset), + ":endloop0", + []>; +} + +let isBranch = 1, isTerminator = 1, hasSideEffects = 0, + Defs = [PC, LC1], Uses = [SA1, LC1] in { +def ENDLOOP1 : Endloop<(outs), (ins brtarget:$offset), + ":endloop1", + []>; +} + +// Pipelined loop instructions, sp[123]loop0 +let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0, + isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, isPredicateLate = 1 in +class SPLOOP_iBase<string SP, bits<2> op> + : CRInst <(outs), (ins brtarget:$r7_2, u10Imm:$U10), + "p3 = sp"#SP#"loop0($r7_2, #$U10)" > { + bits<9> r7_2; + bits<10> U10; + + let IClass = 0b0110; + + let Inst{22-21} = op; + let Inst{27-23} = 0b10011; + let Inst{20-16} = U10{9-5}; + let Inst{12-8} = r7_2{8-4}; + let Inst{7-5} = U10{4-2}; + let Inst{4-3} = r7_2{3-2}; + let Inst{1-0} = U10{1-0}; + } + +let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0, + isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, isPredicateLate = 1 in +class SPLOOP_rBase<string SP, bits<2> op> + : CRInst <(outs), (ins brtarget:$r7_2, IntRegs:$Rs), + "p3 = sp"#SP#"loop0($r7_2, $Rs)" > { + bits<9> r7_2; + bits<5> Rs; + + let IClass = 0b0110; + + let Inst{22-21} = op; + let Inst{27-23} = 0b00001; + let Inst{20-16} = Rs; + let Inst{12-8} = r7_2{8-4}; + let Inst{4-3} = r7_2{3-2}; + } + +multiclass SPLOOP_ri<string mnemonic, bits<2> op> { + def i : SPLOOP_iBase<mnemonic, op>; + def r : SPLOOP_rBase<mnemonic, op>; +} + +defm J2_ploop1s : SPLOOP_ri<"1", 0b01>; +defm J2_ploop2s : SPLOOP_ri<"2", 0b10>; +defm J2_ploop3s : SPLOOP_ri<"3", 0b11>; + +// if (Rs[!>=<]=#0) jump:[t/nt] +let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0, + hasSideEffects = 0 in +class J2_jump_0_Base<string compare, bit isTak, bits<2> op> + : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2), + "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > { + bits<5> Rs; + bits<15> r13_2; + + let IClass = 0b0110; + + let Inst{27-24} = 0b0001; + let Inst{23-22} = op; + let Inst{12} = isTak; + let Inst{21} = r13_2{14}; + let Inst{20-16} = Rs; + let Inst{11-1} = r13_2{12-2}; + let Inst{13} = r13_2{13}; + } + +multiclass J2_jump_compare_0<string compare, bits<2> op> { + def NAME : J2_jump_0_Base<compare, 0, op>; + def NAME#pt : J2_jump_0_Base<compare, 1, op>; +} + +defm J2_jumprz : J2_jump_compare_0<"!=", 0b00>; +defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>; +defm J2_jumprnz : J2_jump_compare_0<"==", 0b10>; +defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>; + +// Transfer to/from Control/GPR Guest/GPR +let hasSideEffects = 0 in +class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble> + : CRInst <(outs CTRC:$dst), (ins RC:$src), + "$dst = $src", [], "", CR_tc_3x_SLOT3> { + bits<5> dst; + bits<5> src; + + let IClass = 0b0110; + + let Inst{27-25} = 0b001; + let Inst{24} = isDouble; + let Inst{23-21} = 0b001; + let Inst{20-16} = src; + let Inst{4-0} = dst; + } + +def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>; +def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>; +def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>; +def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>; + +let hasSideEffects = 0 in +class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle> + : CRInst <(outs RC:$dst), (ins CTRC:$src), + "$dst = $src", [], "", CR_tc_3x_SLOT3> { + bits<5> dst; + bits<5> src; + + let IClass = 0b0110; + + let Inst{27-26} = 0b10; + let Inst{25} = isSingle; + let Inst{24-21} = 0b0000; + let Inst{20-16} = src; + let Inst{4-0} = dst; + } + +let hasNewValue = 1, opNewValue = 0 in +def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>; +def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>; +def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>; +def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>; + +// Y4_trace: Send value to etm trace. +let isSoloAX = 1, hasSideEffects = 0 in +def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), + "trace($Rs)"> { + bits<5> Rs; + + let IClass = 0b0110; + let Inst{27-21} = 0b0010010; + let Inst{20-16} = Rs; + } + +// Support for generating global address. +// Taken from X86InstrInfo.td. +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; + +// HI/LO Instructions +let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, + hasNewValue = 1, opNewValue = 0 in +class REG_IMMED<string RegHalf, string Op, bit Rs, bits<3> MajOp, bit MinOp> + : ALU32_ri<(outs IntRegs:$dst), + (ins i32imm:$imm_value), + "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> { + bits<5> dst; + bits<32> imm_value; + let IClass = 0b0111; + + let Inst{27} = Rs; + let Inst{26-24} = MajOp; + let Inst{21} = MinOp; + let Inst{20-16} = dst; + let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30}); + let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16}); +} + +let isAsmParserOnly = 1 in { + def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>; + def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>; + def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>; + def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>; +} + +let isMoveImm = 1, isCodeGenOnly = 1 in +def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.l = #LO($label@GOTREL)", + []>; + +let isMoveImm = 1, isCodeGenOnly = 1 in +def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.h = #HI($label@GOTREL)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOT)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOT)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOTREL)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOTREL)", + []>; + +// This pattern is incorrect. When we add small data, we should change +// this pattern to use memw(#foo). +// This is for sdata. +let isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set (i32 IntRegs:$dst), + (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; + +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global), + "$dst = CONST32(#$global)", + [(set (i32 IntRegs:$dst), imm:$global) ]>; + +// Map TLS addressses to a CONST32 instruction +def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>; +def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>; + +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global), + "$dst = CONST64(#$global)", + [(set (i64 DoubleRegs:$dst), imm:$global)]>; + +let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, + isCodeGenOnly = 1 in +def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "", + [(set (i1 PredRegs:$dst), 1)]>; + +let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, + isCodeGenOnly = 1 in +def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)", + [(set (i1 PredRegs:$dst), 0)]>; + +// Pseudo instructions. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, +// Optional Flag and Variable Arguments. +// Its 1 Operand has pointer type. +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + ".error \"should not emit\" ", + [(callseq_start timm:$amt)]>; + +let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + ".error \"should not emit\" ", + [(callseq_end timm:$amt1, timm:$amt2)]>; + +// Call subroutine indirectly. +let Defs = VolatileV3.Regs in +def J2_callr : JUMPR_MISC_CALLR<0, 1>; + +// Indirect tail-call. +let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, + isTerminator = 1, isCodeGenOnly = 1 in +def TCRETURNr : T_JMPr; + +// Direct tail-calls. +let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, + isTerminator = 1, isCodeGenOnly = 1 in +def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>; + +//Tail calls. +def: Pat<(HexagonTCRet tglobaladdr:$dst), + (TCRETURNi tglobaladdr:$dst)>; +def: Pat<(HexagonTCRet texternalsym:$dst), + (TCRETURNi texternalsym:$dst)>; +def: Pat<(HexagonTCRet (i32 IntRegs:$dst)), + (TCRETURNr IntRegs:$dst)>; + +// Map from r0 = and(r1, 65535) to r0 = zxth(r1) +def: Pat<(and (i32 IntRegs:$src1), 65535), + (A2_zxth IntRegs:$src1)>; + +// Map from r0 = and(r1, 255) to r0 = zxtb(r1). +def: Pat<(and (i32 IntRegs:$src1), 255), + (A2_zxtb IntRegs:$src1)>; + +// Map Add(p1, true) to p1 = not(p1). +// Add(p1, false) should never be produced, +// if it does, it got to be mapped to NOOP. +def: Pat<(add (i1 PredRegs:$src1), -1), + (C2_not PredRegs:$src1)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3), + (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = C2_muxir(p0, r1, #i) +def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2, + (i32 IntRegs:$src3)), + (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = C2_muxri (p0, #i, r1) +def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3), + (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>; + +// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. +def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; + +// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), + (A2_sxtw (LoReg DoubleRegs:$src1))>; + +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), + (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; + +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), + (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; + +// We want to prevent emitting pnot's as much as possible. +// Map brcond with an unsupported setcc to a J2_jumpf. +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + bb:$offset)>; + +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), + bb:$offset), + (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; + +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; + +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; + +// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) +def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)), + bb:$offset)>; + +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3)), + (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), + (HiReg DoubleRegs:$src3)), + (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), + (LoReg DoubleRegs:$src3)))>; + +// Map from a 1-bit select to logical ops. +// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). +def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), + (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), + (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; + +// Map for truncating from 64 immediates to 32 bit immediates. +def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), + (LoReg DoubleRegs:$src)>; + +// Map for truncating from i64 immediates to i1 bit immediates. +def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), + (C2_tfrrp (LoReg DoubleRegs:$src))>; + +// rs <= rt -> !(rs > rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; + +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; + +// Rss <= Rtt -> !(Rss > Rtt). +def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Map cmpne -> cmpeq. +// Hexagon_TODO: We should improve on this. +// rs != rt -> !(rs == rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>; + +// Convert setne back to xor for hexagon since we compute w/ pred registers. +def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), + (C2_xor PredRegs:$src1, PredRegs:$src2)>; + +// Map cmpne(Rss) -> !cmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; + +// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) +let AddedComplexity = 30 in +def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; + +// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). +// rss >= rtt -> !(rtt > rss). +def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). +// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). +// rs < rt -> !(rs >= rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; + +// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) +def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), + (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; + +// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) +def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>; + +// Generate cmpgtu(Rs, #u9) +def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Sign extends. +// i1 -> i32 +def: Pat<(i32 (sext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, -1, 0)>; + +// i1 -> i64 +def: Pat<(i64 (sext (i1 PredRegs:$src1))), + (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; + +// Zero extends. +// i1 -> i32 +def: Pat<(i32 (zext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def: Pat<(i32 (anyext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; + +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def: Pat<(i64 (anyext (i1 PredRegs:$src1))), + (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; + +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32), + (HiReg $src1), + (LoReg $src2)), + (A2_combinew (A2_tfrsi 0), + (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))), + 32), + (HiReg $src1), + (HiReg $src2)), + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32) +)>; + +// Hexagon specific ISD nodes. +def SDTHexagonALLOCA : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, + [SDNPHasChain]>; + +// The reason for the custom inserter is to record all ALLOCA instructions +// in MachineFunctionInfo. +let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1, + usesCustomInserter = 1 in +def ALLOCA: ALU32Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, u32Imm:$A), "", + [(set (i32 IntRegs:$Rd), + (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>; + +let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in +def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>; + +def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; +let isCodeGenOnly = 1 in +def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + [(set (i32 IntRegs:$dst), + (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>; + +let AddedComplexity = 100 in +def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), + (i32 IntRegs:$src1)>; + +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; + +def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>; +def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>; + +// XTYPE/SHIFT +// +//===----------------------------------------------------------------------===// +// Template Class +// Shift by immediate/register and accumulate/logical +//===----------------------------------------------------------------------===// + +// Rx[+-&|]=asr(Rs,#u5) +// Rx[+-&|^]=lsr(Rs,#u5) +// Rx[+-&|^]=asl(Rs,#u5) + +let hasNewValue = 1, opNewValue = 0 in +class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp> + : SInst_acc<(outs IntRegs:$Rx), + (ins IntRegs:$src1, IntRegs:$Rs, u5Imm:$u5), + "$Rx "#opc2#opc1#"($Rs, #$u5)", + [(set (i32 IntRegs:$Rx), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$Rs), u5ImmPred:$u5)))], + "$src1 = $Rx", S_2op_tc_2_SLOT23> { + bits<5> Rx; + bits<5> Rs; + bits<5> u5; + + let IClass = 0b1000; + + let Inst{27-24} = 0b1110; + let Inst{23-22} = majOp{2-1}; + let Inst{13} = 0b0; + let Inst{7} = majOp{0}; + let Inst{6-5} = minOp; + let Inst{4-0} = Rx; + let Inst{20-16} = Rs; + let Inst{12-8} = u5; + } + +// Rx[+-&|]=asr(Rs,Rt) +// Rx[+-&|^]=lsr(Rs,Rt) +// Rx[+-&|^]=asl(Rs,Rt) + +let hasNewValue = 1, opNewValue = 0 in +class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<2> majOp, bits<2> minOp> + : SInst_acc<(outs IntRegs:$Rx), + (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt), + "$Rx "#opc2#opc1#"($Rs, $Rt)", + [(set (i32 IntRegs:$Rx), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))], + "$src1 = $Rx", S_3op_tc_2_SLOT23 > { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-24} = 0b1100; + let Inst{23-22} = majOp; + let Inst{7-6} = minOp; + let Inst{4-0} = Rx; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } + +// Rxx[+-&|]=asr(Rss,#u6) +// Rxx[+-&|^]=lsr(Rss,#u6) +// Rxx[+-&|^]=asl(Rss,#u6) + +class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp> + : SInst_acc<(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6Imm:$u6), + "$Rxx "#opc2#opc1#"($Rss, #$u6)", + [(set (i64 DoubleRegs:$Rxx), + (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$Rss), u6ImmPred:$u6)))], + "$src1 = $Rxx", S_2op_tc_2_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<6> u6; + + let IClass = 0b1000; + + let Inst{27-24} = 0b0010; + let Inst{23-22} = majOp{2-1}; + let Inst{7} = majOp{0}; + let Inst{6-5} = minOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{13-8} = u6; + } + + +// Rxx[+-&|]=asr(Rss,Rt) +// Rxx[+-&|^]=lsr(Rss,Rt) +// Rxx[+-&|^]=asl(Rss,Rt) +// Rxx[+-&|^]=lsl(Rss,Rt) + +class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp> + : SInst_acc<(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt), + "$Rxx "#opc2#opc1#"($Rss, $Rt)", + [(set (i64 DoubleRegs:$Rxx), + (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))], + "$src1 = $Rxx", S_3op_tc_2_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = majOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rt; + let Inst{7-6} = minOp; + let Inst{4-0} = Rxx; + } + +//===----------------------------------------------------------------------===// +// Multi-class for the shift instructions with logical/arithmetic operators. +//===----------------------------------------------------------------------===// + +multiclass xtype_imm_base<string OpcStr1, string OpcStr2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp > { + def _i_r#NAME : T_shift_imm_acc_r< OpcStr1, OpcStr2, OpNode1, + OpNode2, majOp, minOp >; + def _i_p#NAME : T_shift_imm_acc_p< OpcStr1, OpcStr2, OpNode1, + OpNode2, majOp, minOp >; +} + +multiclass xtype_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> { + let AddedComplexity = 100 in + defm _acc : xtype_imm_base< opc1, "+= ", OpNode, add, 0b001, minOp>; + + defm _nac : xtype_imm_base< opc1, "-= ", OpNode, sub, 0b000, minOp>; + defm _and : xtype_imm_base< opc1, "&= ", OpNode, and, 0b010, minOp>; + defm _or : xtype_imm_base< opc1, "|= ", OpNode, or, 0b011, minOp>; +} + +multiclass xtype_xor_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> { +let AddedComplexity = 100 in + defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>; +} + +defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>; + +defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>, + xtype_xor_imm_acc<"lsr", srl, 0b01>; + +defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>, + xtype_xor_imm_acc<"asl", shl, 0b10>; + +multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> { + let AddedComplexity = 100 in + def _acc : T_shift_reg_acc_r <opc1, "+= ", OpNode, add, 0b11, minOp>; + + def _nac : T_shift_reg_acc_r <opc1, "-= ", OpNode, sub, 0b10, minOp>; + def _and : T_shift_reg_acc_r <opc1, "&= ", OpNode, and, 0b01, minOp>; + def _or : T_shift_reg_acc_r <opc1, "|= ", OpNode, or, 0b00, minOp>; +} + +multiclass xtype_reg_acc_p<string opc1, SDNode OpNode, bits<2>minOp> { + let AddedComplexity = 100 in + def _acc : T_shift_reg_acc_p <opc1, "+= ", OpNode, add, 0b110, minOp>; + + def _nac : T_shift_reg_acc_p <opc1, "-= ", OpNode, sub, 0b100, minOp>; + def _and : T_shift_reg_acc_p <opc1, "&= ", OpNode, and, 0b010, minOp>; + def _or : T_shift_reg_acc_p <opc1, "|= ", OpNode, or, 0b000, minOp>; + def _xor : T_shift_reg_acc_p <opc1, "^= ", OpNode, xor, 0b011, minOp>; +} + +multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > { + defm _r_r : xtype_reg_acc_r <OpcStr, OpNode, minOp>; + defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>; +} + +defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>; +defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>; +defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>; +defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>; + +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_S3op_1 <string mnemonic, RegisterClass RC, bits<2> MajOp, bits<3> MinOp, + bit SwapOps, bit isSat = 0, bit isRnd = 0, bit hasShift = 0> + : SInst <(outs RC:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = "#mnemonic#"($src1, $src2)"#!if(isRnd, ":rnd", "") + #!if(hasShift,":>>1","") + #!if(isSat, ":sat", ""), + [], "", S_3op_tc_2_SLOT23 > { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0001; + let Inst{23-22} = MajOp; + let Inst{20-16} = !if (SwapOps, src2, src1); + let Inst{12-8} = !if (SwapOps, src1, src2); + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps, + bit isSat = 0, bit isRnd = 0, bit hasShift = 0 > + : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps, + isSat, isRnd, hasShift>; + +let Itinerary = S_3op_tc_1_SLOT23 in { + def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>; + def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>; + def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>; + def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>; + + def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>; + def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>; +} + +def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>; + +let hasSideEffects = 0 in +class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps> + : SInst < (outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu), + "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)", + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<2> Pu; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0010; + let Inst{23-21} = MajOp; + let Inst{20-16} = !if (SwapOps, Rtt, Rss); + let Inst{12-8} = !if (SwapOps, Rss, Rtt); + let Inst{6-5} = Pu; + let Inst{4-0} = Rdd; + } + +def S2_valignrb : T_S3op_2 < "valignb", 0b000, 1>; +def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>; + +//===----------------------------------------------------------------------===// +// Template class used by vector shift, vector rotate, vector neg, +// 32-bit shift, 64-bit shifts, etc. +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0 in +class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp, + bits<2> MinOp, bit isSat = 0, list<dag> pattern = [] > + : SInst <(outs RC:$dst), + (ins RC:$src1, IntRegs:$src2), + "$dst = "#mnemonic#"($src1, $src2)"#!if(isSat, ":sat", ""), + pattern, "", S_3op_tc_1_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let IClass = 0b1100; + + let Inst{27-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b0110, 0b0011); + let Inst{23-22} = MajOp; + let Inst{20-16} = src1; + let Inst{12-8} = src2; + let Inst{7-6} = MinOp; + let Inst{4-0} = dst; + } + +let hasNewValue = 1 in +class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp> + : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in +class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp> + : T_S3op_3 <mnemonic, IntRegs, 0b00, MinOp, 1, []>; + + +class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp> + : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, + [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; + + +class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp> + : T_S3op_3 <mnemonic, DoubleRegs, MajOp, MinOp, 0, []>; + + +// Shift by register +// Rdd=[asr|lsr|asl|lsl](Rss,Rt) + +def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>; +def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>; +def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>; +def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>; + +// Rd=[asr|lsr|asl|lsl](Rs,Rt) + +def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>; +def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>; +def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>; +def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>; + +// Shift by register with saturation +// Rd=asr(Rs,Rt):sat +// Rd=asl(Rs,Rt):sat + +let Defs = [USR_OVF] in { + def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>; + def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>; +} + +let hasNewValue = 1, hasSideEffects = 0 in +class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0> + : SInst < (outs IntRegs:$Rd), + (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")" + #!if(hasShift, ":<<1", "") + #!if(isRnd, ":rnd", "") + #!if(isSat, ":sat", ""), + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rd; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0101; + let Inst{20-16} = Rss; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; + } + +def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>; + +let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>; + +let hasSideEffects = 0 in +class T_S3op_7 <string mnemonic, bit MajOp > + : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3), + "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" , + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<3> u3; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0000; + let Inst{23} = MajOp; + let Inst{20-16} = !if(MajOp, Rss, Rtt); + let Inst{12-8} = !if(MajOp, Rtt, Rss); + let Inst{7-5} = u3; + let Inst{4-0} = Rdd; + } + +def S2_valignib : T_S3op_7 < "valignb", 0>; +def S2_vspliceib : T_S3op_7 < "vspliceb", 1>; + +//===----------------------------------------------------------------------===// +// Template class for 'insert bitfield' instructions +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_S3op_insert <string mnemonic, RegisterClass RC> + : SInst <(outs RC:$dst), + (ins RC:$src1, RC:$src2, DoubleRegs:$src3), + "$dst = "#mnemonic#"($src2, $src3)" , + [], "$src1 = $dst", S_3op_tc_1_SLOT23 > { + bits<5> dst; + bits<5> src2; + bits<5> src3; + + let IClass = 0b1100; + + let Inst{27-26} = 0b10; + let Inst{25-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b00, 0b10); + let Inst{23} = 0b0; + let Inst{20-16} = src2; + let Inst{12-8} = src3; + let Inst{4-0} = dst; + } + +let hasSideEffects = 0 in +class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp> + : SInst <(outs RC:$dst), (ins RC:$dst2, RC:$src1, ImmOp:$src2, ImmOp:$src3), + "$dst = insert($src1, #$src2, #$src3)", + [], "$dst2 = $dst", S_2op_tc_2_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<6> src2; + bits<6> src3; + bit bit23; + bit bit13; + string ImmOpStr = !cast<string>(ImmOp); + + let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, 0); + let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0); + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23} = bit23; + let Inst{22-21} = src3{4-3}; + let Inst{20-16} = src1; + let Inst{13} = bit13; + let Inst{12-8} = src2{4-0}; + let Inst{7-5} = src3{2-0}; + let Inst{4-0} = dst; + } + +// Rx=insert(Rs,Rtt) +// Rx=insert(Rs,#u5,#U5) +let hasNewValue = 1 in { + def S2_insert_rp : T_S3op_insert <"insert", IntRegs>; + def S2_insert : T_S2op_insert <0b1111, IntRegs, u5Imm>; +} + +// Rxx=insert(Rss,Rtt) +// Rxx=insert(Rss,#u6,#U6) +def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>; +def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>; + + +def SDTHexagonINSERT: + SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def SDTHexagonINSERTRP: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i64>]>; + +def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; +def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; + +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>; +def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; +def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; + +let AddedComplexity = 100 in +def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), + (i32 (extloadi8 (add I32:$b, 3))), + 24, 8), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io I32:$b, 0))>; + + +//===----------------------------------------------------------------------===// +// Template class for 'extract bitfield' instructions +//===----------------------------------------------------------------------===// +let hasNewValue = 1, hasSideEffects = 0 in +class T_S3op_extract <string mnemonic, bits<2> MinOp> + : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rtt), + "$Rd = "#mnemonic#"($Rs, $Rtt)", + [], "", S_3op_tc_2_SLOT23 > { + bits<5> Rd; + bits<5> Rs; + bits<5> Rtt; + + let IClass = 0b1100; + + let Inst{27-22} = 0b100100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rtt; + let Inst{7-6} = MinOp; + let Inst{4-0} = Rd; + } + +let hasSideEffects = 0 in +class T_S2op_extract <string mnemonic, bits<4> RegTyBits, + RegisterClass RC, Operand ImmOp> + : SInst <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2, ImmOp:$src3), + "$dst = "#mnemonic#"($src1, #$src2, #$src3)", + [], "", S_2op_tc_2_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<6> src2; + bits<6> src3; + bit bit23; + bit bit13; + string ImmOpStr = !cast<string>(ImmOp); + + let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, + !if (!eq(mnemonic, "extractu"), 0, 1)); + + let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0); + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23} = bit23; + let Inst{22-21} = src3{4-3}; + let Inst{20-16} = src1; + let Inst{13} = bit13; + let Inst{12-8} = src2{4-0}; + let Inst{7-5} = src3{2-0}; + let Inst{4-0} = dst; + } + +// Extract bitfield + +// Rdd=extractu(Rss,Rtt) +// Rdd=extractu(Rss,#u6,#U6) +def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>; +def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>; + +// Rd=extractu(Rs,Rtt) +// Rd=extractu(Rs,#u5,#U5) +let hasNewValue = 1 in { + def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>; + def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>; +} + +def SDTHexagonEXTRACTU: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTURP: + SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i64>]>; + +def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; +def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; + +def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3), + (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3), + (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), + (S2_extractu_rp I32:$src1, I64:$src2)>; +def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), + (S2_extractup_rp I64:$src1, I64:$src2)>; + +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), + (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>; + +//===----------------------------------------------------------------------===// +// :raw for of tableindx[bdhw] insns +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class tableidxRaw<string OpStr, bits<2>MinOp> + : SInst <(outs IntRegs:$Rx), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, s6Imm:$S6), + "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw", + [], "$Rx = $_dst_" > { + bits<5> Rx; + bits<5> Rs; + bits<4> u4; + bits<6> S6; + + let IClass = 0b1000; + + let Inst{27-24} = 0b0111; + let Inst{23-22} = MinOp; + let Inst{21} = u4{3}; + let Inst{20-16} = Rs; + let Inst{13-8} = S6; + let Inst{7-5} = u4{2-0}; + let Inst{4-0} = Rx; + } + +def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>; +def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>; +def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>; +def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>; + +//===----------------------------------------------------------------------===// +// Template class for 'table index' instructions which are assembler mapped +// to their :raw format. +//===----------------------------------------------------------------------===// +let isPseudo = 1 in +class tableidx_goodsyntax <string mnemonic> + : SInst <(outs IntRegs:$Rx), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5), + "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)", + [], "$Rx = $_dst_" >; + +def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">; +def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">; +def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">; +def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">; + +//===----------------------------------------------------------------------===// +// V3 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV3.td" + +//===----------------------------------------------------------------------===// +// V3 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V4 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV4.td" + +//===----------------------------------------------------------------------===// +// V4 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V5 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV5.td" + +//===----------------------------------------------------------------------===// +// V5 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V60 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV60.td" + +//===----------------------------------------------------------------------===// +// V60 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU32/64/Vector + +//===----------------------------------------------------------------------===/// + +include "HexagonInstrInfoVector.td" + +include "HexagonInstrAlias.td" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td new file mode 100644 index 0000000..84d035d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -0,0 +1,266 @@ +//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +//===----------------------------------------------------------------------===// +// J + +//===----------------------------------------------------------------------===// +// Call subroutine. +let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1, + isExtended = 0, isExtendable = 1, opExtendable = 0, + isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in +class T_Call<string ExtStr> + : JInst<(outs), (ins calltarget:$dst), + "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> { + let BaseOpcode = "call"; + bits<24> dst; + + let IClass = 0b0101; + let Inst{27-25} = 0b101; + let Inst{24-16,13-1} = dst{23-2}; + let Inst{0} = 0b0; +} + +let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1, + isExtended = 0, isExtendable = 1, opExtendable = 1, + isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in +class T_CallPred<bit IfTrue, string ExtStr> + : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst), + CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst", + [], "", J_tc_2early_SLOT23> { + let BaseOpcode = "call"; + let isPredicatedFalse = !if(IfTrue,0,1); + bits<2> Pu; + bits<17> dst; + + let IClass = 0b0101; + let Inst{27-24} = 0b1101; + let Inst{23-22,20-16,13,7-1} = dst{16-2}; + let Inst{21} = !if(IfTrue,0,1); + let Inst{11} = 0b0; + let Inst{9-8} = Pu; +} + +multiclass T_Calls<string ExtStr> { + def NAME : T_Call<ExtStr>; + def t : T_CallPred<1, ExtStr>; + def f : T_CallPred<0, ExtStr>; +} + +defm J2_call: T_Calls<"">, PredRel; + +let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in +def CALLv3nr : T_Call<"">, PredRel; + +//===----------------------------------------------------------------------===// +// J - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// +// Call subroutine from register. + +let isCodeGenOnly = 1, Defs = VolatileV3.Regs in { + def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return. +} + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// + +let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in +def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>; + +class T_ALU64_addsp_hl<string suffix, bits<3> MinOp> + : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">; + +def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>; +def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>; + +let hasSideEffects = 0, isAsmParserOnly = 1 in +def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd), + (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", + [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))), + (i64 DoubleRegs:$Rt))))], + "", ALU64_tc_1_SLOT23>; + + +let hasSideEffects = 0 in +class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned> + : ALU64Inst<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rt, DoubleRegs:$Rs), + "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","") + #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00111; + let Inst{22-21} = !if(isMax, 0b10, 0b01); + let Inst{20-16} = !if(isMax, Rt, Rs); + let Inst{12-8} = !if(isMax, Rs, Rt); + let Inst{7} = 0b1; + let Inst{6} = !if(isMax, 0b0, 0b1); + let Inst{5} = isUnsigned; + let Inst{4-0} = Rd; +} + +def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>; +def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>; +def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>; +def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>; + +multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { + defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>; +} + +let AddedComplexity = 200 in { + defm: MinMax_pats_p<setge, A2_maxp, A2_minp>; + defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>; + defm: MinMax_pats_p<setle, A2_minp, A2_maxp>; + defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>; + defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>; + defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>; + defm: MinMax_pats_p<setule, A2_minup, A2_maxup>; + defm: MinMax_pats_p<setult, A2_minup, A2_maxup>; +} + +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + + + + +//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>; + +//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>; + +//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>; + +//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; + +//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset), +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; + +// Map call instruction +def : Pat<(callv3 (i32 IntRegs:$dst)), + (J2_callr (i32 IntRegs:$dst))>; +def : Pat<(callv3 tglobaladdr:$dst), + (J2_call tglobaladdr:$dst)>; +def : Pat<(callv3 texternalsym:$dst), + (J2_call texternalsym:$dst)>; +def : Pat<(callv3 tglobaltlsaddr:$dst), + (J2_call tglobaltlsaddr:$dst)>; + +def : Pat<(callv3nr (i32 IntRegs:$dst)), + (CALLRv3nr (i32 IntRegs:$dst))>; +def : Pat<(callv3nr tglobaladdr:$dst), + (CALLv3nr tglobaladdr:$dst)>; +def : Pat<(callv3nr texternalsym:$dst), + (CALLv3nr texternalsym:$dst)>; + +//===----------------------------------------------------------------------===// +// :raw form of vrcmpys:hi/lo insns +//===----------------------------------------------------------------------===// +// Vector reduce complex multiply by scalar. +let Defs = [USR_OVF], hasSideEffects = 0 in +class T_vrcmpRaw<string HiLo, bits<3>MajOp>: + MInst<(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b100; + let Inst{4-0} = Rdd; +} + +def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>; +def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>; + +// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l +let hasSideEffects = 0, isAsmParserOnly = 1 in +def M2_vrcmpys_s1 + : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">; + +// Vector reduce complex multiply by scalar with accumulation. +let Defs = [USR_OVF], hasSideEffects = 0 in +class T_vrcmpys_acc<string HiLo, bits<3>MajOp>: + MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [], + "$Rxx = $_src_"> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b100; + let Inst{4-0} = Rxx; + } + +def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>; +def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>; + +// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l + +let isAsmParserOnly = 1 in +def M2_vrcmpys_acc_s1 + : MInst <(outs DoubleRegs:$dst), + (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2), + "$dst += vrcmpys($src1, $src2):<<1:sat", [], + "$dst2 = $dst">; + +def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>; +def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>; + +// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l +let isAsmParserOnly = 1 in +def M2_vrcmpys_s1rp + : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">; + + +// S2_cabacdecbin: Cabac decode bin. +let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in +def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td new file mode 100644 index 0000000..87d6b35 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -0,0 +1,4251 @@ +//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +def DuplexIClass0: InstDuplex < 0 >; +def DuplexIClass1: InstDuplex < 1 >; +def DuplexIClass2: InstDuplex < 2 >; +let isExtendable = 1 in { + def DuplexIClass3: InstDuplex < 3 >; + def DuplexIClass4: InstDuplex < 4 >; + def DuplexIClass5: InstDuplex < 5 >; + def DuplexIClass6: InstDuplex < 6 >; + def DuplexIClass7: InstDuplex < 7 >; +} +def DuplexIClass8: InstDuplex < 8 >; +def DuplexIClass9: InstDuplex < 9 >; +def DuplexIClassA: InstDuplex < 0xA >; +def DuplexIClassB: InstDuplex < 0xB >; +def DuplexIClassC: InstDuplex < 0xC >; +def DuplexIClassD: InstDuplex < 0xD >; +def DuplexIClassE: InstDuplex < 0xE >; +def DuplexIClassF: InstDuplex < 0xF >; + +def addrga: PatLeaf<(i32 AddrGA:$Addr)>; +def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; + +let hasSideEffects = 0 in +class T_Immext<Operand ImmType> + : EXTENDERInst<(outs), (ins ImmType:$imm), + "immext(#$imm)", []> { + bits<32> imm; + let IClass = 0b0000; + + let Inst{27-16} = imm{31-20}; + let Inst{13-0} = imm{19-6}; + } + +def A4_ext : T_Immext<u26_6Imm>; +let isCodeGenOnly = 1 in { + let isBranch = 1 in + def A4_ext_b : T_Immext<brtarget>; + let isCall = 1 in + def A4_ext_c : T_Immext<calltarget>; + def A4_ext_g : T_Immext<globaladdress>; +} + +def BITPOS32 : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-31]. + // As an SDNode. + int32_t imm = N->getSExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + + +// Hexagon V4 Architecture spec defines 8 instruction classes: +// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the +// compiler) + +// LD Instructions: +// ======================================== +// Loads (8/16/32/64 bit) +// Deallocframe + +// ST Instructions: +// ======================================== +// Stores (8/16/32/64 bit) +// Allocframe + +// ALU32 Instructions: +// ======================================== +// Arithmetic / Logical (32 bit) +// Vector Halfword + +// XTYPE Instructions (32/64 bit): +// ======================================== +// Arithmetic, Logical, Bit Manipulation +// Multiply (Integer, Fractional, Complex) +// Permute / Vector Permute Operations +// Predicate Operations +// Shift / Shift with Add/Sub/Logical +// Vector Byte ALU +// Vector Halfword (ALU, Shift, Multiply) +// Vector Word (ALU, Shift) + +// J Instructions: +// ======================================== +// Jump/Call PC-relative + +// JR Instructions: +// ======================================== +// Jump/Call Register + +// MEMOP Instructions: +// ======================================== +// Operation on memory (8/16/32 bit) + +// NV Instructions: +// ======================================== +// New-value Jumps +// New-value Stores + +// CR Instructions: +// ======================================== +// Control-Register Transfers +// Hardware Loop Setup +// Predicate Logicals & Reductions + +// SYSTEM Instructions (not implemented in the compiler): +// ======================================== +// Prefetch +// Cache Maintenance +// Bus Operations + + +//===----------------------------------------------------------------------===// +// ALU32 + +//===----------------------------------------------------------------------===// + +class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit OpsRev> + : T_ALU32_3op<mnemonic, MajOp, MinOp, OpsRev, 0> { + let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)"; +} + +let BaseOpcode = "andn_rr", CextOpcode = "andn" in +def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>; +let BaseOpcode = "orn_rr", CextOpcode = "orn" in +def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>; + +let CextOpcode = "rcmp.eq" in +def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>; +let CextOpcode = "!rcmp.eq" in +def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>; + +def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>; +def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>; +def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>; + +// Pats for instruction selection. + +// A class to embed the usual comparison patfrags within a zext to i32. +// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same +// names, or else the frag's "body" won't match the operands. +class CmpInReg<PatFrag Op> + : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; + +def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>; +def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>; + +def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>; +def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>; + +def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>; + +class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>, + ImmRegRel { + let InputType = "reg"; + let CextOpcode = mnemonic; + let isCompare = 1; + let isCommutable = IsComm; + let hasSideEffects = 0; + + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1100; + let Inst{27-21} = 0b0111110; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{1-0} = Pd; +} + +def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>; +def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>; +def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>; +def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>; +def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>; +def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>; + +let AddedComplexity = 100 in { + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; +} + +class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm, + Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits> + : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm), + "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>, + ImmRegRel { + let InputType = "imm"; + let CextOpcode = mnemonic; + let isCompare = 1; + let isCommutable = IsComm; + let hasSideEffects = 0; + let isExtendable = IsImmExt; + let opExtendable = !if (IsImmExt, 2, 0); + let isExtentSigned = IsImmSigned; + let opExtentBits = ImmBits; + + bits<2> Pd; + bits<5> Rs; + bits<8> Imm; + + let IClass = 0b1101; + let Inst{27-24} = 0b1101; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-5} = Imm; + let Inst{4} = 0b0; + let Inst{3} = IsHalf; + let Inst{1-0} = Pd; +} + +def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8Imm, 0, 0, 8>; +def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8Imm, 0, 1, 8>; +def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>; +def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8Ext, 1, 1, 8>; +def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8Ext, 1, 1, 8>; +def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>; + +class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> + : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>, + ImmRegRel { + let InputType = "imm"; + let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq"); + let isExtendable = 1; + let opExtendable = 2; + let isExtentSigned = 1; + let opExtentBits = 8; + let hasNewValue = 1; + + bits<5> Rd; + bits<5> Rs; + bits<8> s8; + + let IClass = 0b0111; + let Inst{27-24} = 0b0011; + let Inst{22} = 0b1; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{13} = 0b1; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; +} + +def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>; +def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>; + +def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>; +def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>; + +// Preserve the S2_tstbit_r generation +def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), + (i32 IntRegs:$src1))), 0)))), + (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; + +//===----------------------------------------------------------------------===// +// ALU32 - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// + +// Combine a word and an immediate into a register pair. +let hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1, + opExtentBits = 8 in +class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr> + : ALU32Inst <(outs DoubleRegs:$Rdd), ins, AsmStr> { + bits<5> Rdd; + bits<5> Rs; + bits<8> s8; + + let IClass = 0b0111; + let Inst{27-24} = 0b0011; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = 0b1; + let Inst{12-5} = s8; + let Inst{4-0} = Rdd; + } + +let opExtendable = 2 in +def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8), + "$Rdd = combine($Rs, #$s8)">; + +let opExtendable = 1 in +def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs), + "$Rdd = combine(#$s8, $Rs)">; + +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { +def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i), + (A4_combineri IntRegs:$r, s32ImmPred:$i)>; + +def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r), + (A4_combineir s32ImmPred:$i, IntRegs:$r)>; +} + +// A4_combineii: Set two small immediates. +let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in +def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), + "$Rdd = combine(#$s8, #$U6)"> { + bits<5> Rdd; + bits<8> s8; + bits<6> U6; + + let IClass = 0b0111; + let Inst{27-23} = 0b11001; + let Inst{20-16} = U6{5-1}; + let Inst{13} = U6{0}; + let Inst{12-5} = s8; + let Inst{4-0} = Rdd; + } + +// The complexity of the combine with two immediates should be greater than +// the complexity of a combine involving a register. +let AddedComplexity = 75 in +def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6), + (A4_combineii imm:$s8, imm:$u6)>; + +//===----------------------------------------------------------------------===// +// ALU32/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// + +def Zext64: OutPatFrag<(ops node:$Rs), + (i64 (A4_combineir 0, (i32 $Rs)))>; +def Sext64: OutPatFrag<(ops node:$Rs), + (i64 (A2_sxtw (i32 $Rs)))>; + +// Patterns to generate indexed loads with different forms of the address: +// - frameindex, +// - base + offset, +// - base (without offset). +multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, + PatLeaf ImmPred, InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), + (VT (ValueMod (MI AddrFI:$fi, 0)))>; + def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), + (VT (ValueMod (MI IntRegs:$Rs, 0)))>; +} + +defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>; +defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>; + +// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). +def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; + +//===----------------------------------------------------------------------===// +// Template class for load instructions with Absolute set addressing mode. +//===----------------------------------------------------------------------===// +let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet, + hasSideEffects = 0 in +class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>: + LDInst<(outs RC:$dst1, IntRegs:$dst2), + (ins u6Ext:$addr), + "$dst1 = "#mnemonic#"($dst2 = #$addr)", + []> { + bits<7> name; + bits<5> dst1; + bits<5> dst2; + bits<6> addr; + + let IClass = 0b1001; + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{13-12} = 0b01; + let Inst{4-0} = dst1; + let Inst{20-16} = dst2; + let Inst{11-8} = addr{5-2}; + let Inst{6-5} = addr{1-0}; +} + +let accessSize = ByteAccess, hasNewValue = 1 in { + def L4_loadrb_ap : T_LD_abs_set <"memb", IntRegs, 0b1000>; + def L4_loadrub_ap : T_LD_abs_set <"memub", IntRegs, 0b1001>; +} + +let accessSize = HalfWordAccess, hasNewValue = 1 in { + def L4_loadrh_ap : T_LD_abs_set <"memh", IntRegs, 0b1010>; + def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>; + def L4_loadbsw2_ap : T_LD_abs_set <"membh", IntRegs, 0b0001>; + def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>; +} + +let accessSize = WordAccess, hasNewValue = 1 in + def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>; + +let accessSize = WordAccess in { + def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>; + def L4_loadbsw4_ap : T_LD_abs_set <"membh", DoubleRegs, 0b0111>; +} + +let accessSize = DoubleWordAccess in +def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>; + +let accessSize = ByteAccess in + def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>; + +let accessSize = HalfWordAccess in +def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>; + +// Load - Indirect with long offset +let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1, +opExtentBits = 6, opExtendable = 3 in +class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC, + bits<4> MajOp> + : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3), + "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)", + [] >, ImmRegShl { + bits<5> dst; + bits<5> src1; + bits<2> src2; + bits<6> src3; + let CextOpcode = CextOp; + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + + let IClass = 0b1001; + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12} = 0b1; + let Inst{11-8} = src3{5-2}; + let Inst{7} = src2{0}; + let Inst{6-5} = src3{1-0}; + let Inst{4-0} = dst; + } + +let accessSize = ByteAccess in { + def L4_loadrb_ur : T_LoadAbsReg<"memb", "LDrib", IntRegs, 0b1000>; + def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>; + def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo", + DoubleRegs, 0b0100>; +} + +let accessSize = HalfWordAccess in { + def L4_loadrh_ur : T_LoadAbsReg<"memh", "LDrih", IntRegs, 0b1010>; + def L4_loadruh_ur : T_LoadAbsReg<"memuh", "LDriuh", IntRegs, 0b1011>; + def L4_loadbsw2_ur : T_LoadAbsReg<"membh", "LDribh2", IntRegs, 0b0001>; + def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>; + def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo", + DoubleRegs, 0b0010>; +} + +let accessSize = WordAccess in { + def L4_loadri_ur : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>; + def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>; + def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>; +} + +let accessSize = DoubleWordAccess in +def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>; + + +multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> { + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3)))), + (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tglobaladdr:$src2)))), + (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tconstpool:$src3)))), + (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tconstpool:$src2)))), + (MI IntRegs:$src1, 0, tconstpool:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tjumptable:$src3)))), + (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tjumptable:$src2)))), + (MI IntRegs:$src1, 0, tjumptable:$src2)>; +} + +let AddedComplexity = 60 in { +defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>; +defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>; +defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>; + +defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>; +defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>; +defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>; + +defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>; +defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>; +} + +//===----------------------------------------------------------------------===// +// Template classes for the non-predicated load instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>: + LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$u2), + "$dst = "#mnemonic#"($src1 + $src2<<#$u2)", + [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<2> u2; + + let IClass = 0b0011; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{12-8} = src2; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{4-0} = dst; + } + +//===----------------------------------------------------------------------===// +// Template classes for the predicated load instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicated = 1 in +class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, + bit isNot, bit isPredNew>: + LDInst <(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$u2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)", + [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<5> src3; + bits<2> u2; + + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; + + let IClass = 0b0011; + + let Inst{27-26} = 0b00; + let Inst{25} = isPredNew; + let Inst{24} = isNot; + let Inst{23-21} = MajOp; + let Inst{20-16} = src2; + let Inst{12-8} = src3; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{6-5} = src1; + let Inst{4-0} = dst; + } + +//===----------------------------------------------------------------------===// +// multiclass for load instructions with base + register offset +// addressing mode +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = BaseRegOffset in +multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC, + bits<3> MajOp > { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl, + InputType = "reg" in { + let isPredicable = 1 in + def L4_#NAME#_rr : T_load_rr <mnemonic, RC, MajOp>; + + // Predicated + def L4_p#NAME#t_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 0>; + def L4_p#NAME#f_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 0>; + + // Predicated new + def L4_p#NAME#tnew_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 1>; + def L4_p#NAME#fnew_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 1>; + } +} + +let hasNewValue = 1, accessSize = ByteAccess in { + defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>; + defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>; +} + +let hasNewValue = 1, accessSize = HalfWordAccess in { + defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>; + defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>; +} + +let hasNewValue = 1, accessSize = WordAccess in +defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>; + +let accessSize = DoubleWordAccess in +defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>; + +// 'def pats' for load instructions with base + register offset and non-zero +// immediate value. Immediate value is used to left-shift the second +// register operand. +class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; + +let AddedComplexity = 40 in { + def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_pat<load, i32, L4_loadri_rr>; + def: Loadxs_pat<load, i64, L4_loadrd_rr>; +} + +// 'def pats' for load instruction base + register offset and +// zero immediate value. +class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; + +let AddedComplexity = 20 in { + def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_simple_pat<load, i32, L4_loadri_rr>; + def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>; +} + +// zext i1->i64 +def: Pat<(i64 (zext (i1 PredRegs:$src1))), + (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>; + +// zext i32->i64 +def: Pat<(i64 (zext (i32 IntRegs:$src1))), + (Zext64 IntRegs:$src1)>; + +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// +// Template class for store instructions with Absolute set addressing mode. +//===----------------------------------------------------------------------===// +let isExtended = 1, opExtendable = 1, opExtentBits = 6, + addrMode = AbsoluteSet in +class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC, + bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> + : STInst<(outs IntRegs:$dst), + (ins u6Ext:$addr, RC:$src), + mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel { + bits<5> dst; + bits<6> addr; + bits<5> src; + let accessSize = AccessSz; + let BaseOpcode = BaseOp#"_AbsSet"; + + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + + let IClass = 0b1010; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = dst; + let Inst{13} = 0b0; + let Inst{12-8} = src; + let Inst{7} = 0b1; + let Inst{5-0} = addr; + } + +def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>; +def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010, + HalfWordAccess>; +def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>; + +let isNVStorable = 0 in { + def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs, + 0b011, HalfWordAccess, 1>; + def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs, + 0b110, DoubleWordAccess>; +} + +let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2, +isExtended = 1, opExtentBits= 6 in +class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp, + MemAccessSize AccessSz > + : NVInst <(outs IntRegs:$dst), + (ins u6Ext:$addr, IntRegs:$src), + mnemonic#"($dst = #$addr) = $src.new">, NewValueRel { + bits<5> dst; + bits<6> addr; + bits<3> src; + let accessSize = AccessSz; + let BaseOpcode = BaseOp#"_AbsSet"; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = dst; + let Inst{13-11} = 0b000; + let Inst{12-11} = MajOp; + let Inst{10-8} = src; + let Inst{7} = 0b1; + let Inst{5-0} = addr; + } + +let mayStore = 1, addrMode = AbsoluteSet in { + def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>; + def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>; + def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>; +} + +let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm", + addrMode = BaseLongOffset, AddedComplexity = 40 in +class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC, + bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> + : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4), + mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""), + []>, ImmRegShl, NewValueRel { + + bits<5> src1; + bits<2> src2; + bits<6> src3; + bits<5> src4; + + let accessSize = AccessSz; + let CextOpcode = CextOp; + let BaseOpcode = CextOp#"_shl"; + + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + + let IClass = 0b1010; + + let Inst{27-24} =0b1101; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12-8} = src4; + let Inst{7} = 0b1; + let Inst{6} = src2{0}; + let Inst{5-0} = src3; +} + +def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>; +def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010, + HalfWordAccess>; +def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011, + HalfWordAccess, 1>; +def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>; +def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110, + DoubleWordAccess>; + +let AddedComplexity = 40 in +multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, + PatFrag stOp> { + def : Pat<(stOp (VT RC:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u32ImmPred:$src3)), + (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +} + +defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>; +defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>; +defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>; +defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>; + +let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset, + opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in +class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp, + MemAccessSize AccessSz> + : NVInst <(outs ), + (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4), + mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel { + bits<5> src1; + bits<2> src2; + bits<6> src3; + bits<3> src4; + + let CextOpcode = CextOp; + let BaseOpcode = CextOp#"_shl"; + let IClass = 0b1010; + + let Inst{27-21} = 0b1101101; + let Inst{12-11} = 0b00; + let Inst{7} = 0b1; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src4; + let Inst{6} = src2{0}; + let Inst{5-0} = src3; + } + +def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>; +def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>; +def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>; + +//===----------------------------------------------------------------------===// +// Template classes for the non-predicated store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicable = 1 in +class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH> + : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt), + mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""), + [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel { + + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<5> Rt; + + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); + + let IClass = 0b0011; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{4-0} = Rt; + } + +//===----------------------------------------------------------------------===// +// Template classes for the predicated store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicated = 1 in +class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, + bit isNot, bit isPredNew, bit isH> + : STInst <(outs), + (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt), + + !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""), + [], "", V4LDST_tc_st_SLOT01> , AddrModeRel{ + bits<2> Pv; + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<5> Rt; + + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); + + let IClass = 0b0011; + + let Inst{27-26} = 0b01; + let Inst{25} = isPredNew; + let Inst{24} = isNot; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{6-5} = Pv; + let Inst{4-0} = Rt; + } + +//===----------------------------------------------------------------------===// +// Template classes for the new-value store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicable = 1, isNewValue = 1, opNewValue = 3 in +class T_store_new_rr <string mnemonic, bits<2> MajOp> : + NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt), + mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new", + [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel { + + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<3> Nt; + + let IClass = 0b0011; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{4-3} = MajOp; + let Inst{2-0} = Nt; + } + +//===----------------------------------------------------------------------===// +// Template classes for the predicated new-value store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicated = 1, isNewValue = 1, opNewValue = 4 in +class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew> + : NVInst<(outs), + (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt), + !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new", + [], "", V4LDST_tc_st_SLOT0>, AddrModeRel { + bits<2> Pv; + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<3> Nt; + + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; + + let IClass = 0b0011; + let Inst{27-26} = 0b01; + let Inst{25} = isPredNew; + let Inst{24} = isNot; + let Inst{23-21} = 0b101; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{6-5} = Pv; + let Inst{4-3} = MajOp; + let Inst{2-0} = Nt; + } + +//===----------------------------------------------------------------------===// +// multiclass for store instructions with base + register offset addressing +// mode +//===----------------------------------------------------------------------===// +let isNVStorable = 1 in +multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC, + bits<3> MajOp, bit isH = 0> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { + def S4_#NAME#_rr : T_store_rr <mnemonic, RC, MajOp, isH>; + + // Predicated + def S4_p#NAME#t_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 0, isH>; + def S4_p#NAME#f_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 0, isH>; + + // Predicated new + def S4_p#NAME#tnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 1, isH>; + def S4_p#NAME#fnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 1, isH>; + } +} + +//===----------------------------------------------------------------------===// +// multiclass for new-value store instructions with base + register offset +// addressing mode. +//===----------------------------------------------------------------------===// +let mayStore = 1, isNVStore = 1 in +multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC, + bits<2> MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { + def S4_#NAME#new_rr : T_store_new_rr<mnemonic, MajOp>; + + // Predicated + def S4_p#NAME#newt_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 0>; + def S4_p#NAME#newf_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 0>; + + // Predicated new + def S4_p#NAME#newtnew_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 1>; + def S4_p#NAME#newfnew_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 1>; + } +} + +let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in { + let accessSize = ByteAccess in + defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>, + ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>; + + let accessSize = HalfWordAccess in + defm storerh: ST_Idxd_shl<"memh", "STrih", IntRegs, 0b010>, + ST_Idxd_shl_nv<"memh", "STrih", IntRegs, 0b01>; + + let accessSize = WordAccess in + defm storeri: ST_Idxd_shl<"memw", "STriw", IntRegs, 0b100>, + ST_Idxd_shl_nv<"memw", "STriw", IntRegs, 0b10>; + + let isNVStorable = 0, accessSize = DoubleWordAccess in + defm storerd: ST_Idxd_shl<"memd", "STrid", DoubleRegs, 0b110>; + + let isNVStorable = 0, accessSize = HalfWordAccess in + defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>; +} + +class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))), + (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; + +let AddedComplexity = 40 in { + def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>; + def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>; + def: Storexs_pat<store, I32, S4_storeri_rr>; + def: Storexs_pat<store, I64, S4_storerd_rr>; +} + +// memd(Rx++#s4:3)=Rtt +// memd(Rx++#s4:3:circ(Mu))=Rtt +// memd(Rx++I:circ(Mu))=Rtt +// memd(Rx++Mu)=Rtt +// memd(Rx++Mu:brev)=Rtt +// memd(gp+#u16:3)=Rtt + +// Store doubleword conditionally. +// if ([!]Pv[.new]) memd(#u6)=Rtt +// TODO: needs to be implemented. + +//===----------------------------------------------------------------------===// +// Template class +//===----------------------------------------------------------------------===// +let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8, + opExtendable = 2 in +class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp > + : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8Ext:$S8), + mnemonic#"($Rs+#$offset)=#$S8", + [], "", V4LDST_tc_st_SLOT01>, + ImmRegRel, PredNewRel { + bits<5> Rs; + bits<8> S8; + bits<8> offset; + bits<6> offsetBits; + + string OffsetOpStr = !cast<string>(OffsetOp); + let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2}, + !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1}, + /* u6_0Imm */ offset{5-0})); + + let IClass = 0b0011; + + let Inst{27-25} = 0b110; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-7} = offsetBits; + let Inst{13} = S8{7}; + let Inst{6-0} = S8{6-0}; + } + +let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6, + opExtendable = 3 in +class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp, + bit isPredNot, bit isPredNew > + : STInst <(outs ), + (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6Ext:$S6), + !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($Rs+#$offset)=#$S6", + [], "", V4LDST_tc_st_SLOT01>, + ImmRegRel, PredNewRel { + bits<2> Pv; + bits<5> Rs; + bits<6> S6; + bits<8> offset; + bits<6> offsetBits; + + string OffsetOpStr = !cast<string>(OffsetOp); + let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2}, + !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1}, + /* u6_0Imm */ offset{5-0})); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + let IClass = 0b0011; + + let Inst{27-25} = 0b100; + let Inst{24} = isPredNew; + let Inst{23} = isPredNot; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = S6{5}; + let Inst{12-7} = offsetBits; + let Inst{6-5} = Pv; + let Inst{4-0} = S6{4-0}; + } + + +//===----------------------------------------------------------------------===// +// multiclass for store instructions with base + immediate offset +// addressing mode and immediate stored value. +// mem[bhw](Rx++#s4:3)=#s8 +// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6 +//===----------------------------------------------------------------------===// + +multiclass ST_Imm_Pred <string mnemonic, Operand OffsetOp, bits<2> MajOp, + bit PredNot> { + def _io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 0>; + // Predicate new + def new_io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 1>; +} + +multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp, + bits<2> MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in { + def _io : T_StoreImm <mnemonic, OffsetOp, MajOp>; + + defm t : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 0>; + defm f : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 1>; + } +} + +let hasSideEffects = 0, addrMode = BaseImmOffset, + InputType = "imm" in { + let accessSize = ByteAccess in + defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>; + + let accessSize = HalfWordAccess in + defm S4_storeirh : ST_Imm<"memh", "STrih", u6_1Imm, 0b01>; + + let accessSize = WordAccess in + defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>; +} + +def IMM_BYTE : SDNodeXForm<imm, [{ + // -1 etc is represented as 255 etc + // assigning to a byte restores our desired signed value. + int8_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_HALF : SDNodeXForm<imm, [{ + // -1 etc is represented as 65535 etc + // assigning to a short restores our desired signed value. + int16_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_WORD : SDNodeXForm<imm, [{ + // -1 etc can be represented as 4294967295 etc + // Currently, it's not doing this. But some optimization + // might convert -1 to a large +ve number. + // assigning to a word restores our desired signed value. + int32_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; +def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; +def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; + +let AddedComplexity = 40 in { + // Not using frameindex patterns for these stores, because the offset + // is not extendable. This could cause problems during removing the frame + // indices, since the offset with respect to R29/R30 may not fit in the + // u6 field. + def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte, + S4_storeirb_io>; + def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf, + S4_storeirh_io>; + def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord, + S4_storeiri_io>; +} + +def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>; +def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>; +def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>; + +// memb(Rx++#s4:0:circ(Mu))=Rt +// memb(Rx++I:circ(Mu))=Rt +// memb(Rx++Mu)=Rt +// memb(Rx++Mu:brev)=Rt +// memb(gp+#u16:0)=Rt + +// Store halfword. +// TODO: needs to be implemented +// memh(Re=#U6)=Rt.H +// memh(Rs+#s11:1)=Rt.H +// memh(Rs+Ru<<#u2)=Rt.H +// TODO: needs to be implemented. + +// memh(Ru<<#u2+#U6)=Rt.H +// memh(Rx++#s4:1:circ(Mu))=Rt.H +// memh(Rx++#s4:1:circ(Mu))=Rt +// memh(Rx++I:circ(Mu))=Rt.H +// memh(Rx++I:circ(Mu))=Rt +// memh(Rx++Mu)=Rt.H +// memh(Rx++Mu)=Rt +// memh(Rx++Mu:brev)=Rt.H +// memh(Rx++Mu:brev)=Rt +// memh(gp+#u16:1)=Rt +// if ([!]Pv[.new]) memh(#u6)=Rt.H +// if ([!]Pv[.new]) memh(#u6)=Rt + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H +// TODO: needs to be implemented. + +// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H +// TODO: Needs to be implemented. + +// Store word. +// memw(Re=#U6)=Rt +// TODO: Needs to be implemented. +// memw(Rx++#s4:2)=Rt +// memw(Rx++#s4:2:circ(Mu))=Rt +// memw(Rx++I:circ(Mu))=Rt +// memw(Rx++Mu)=Rt +// memw(Rx++Mu:brev)=Rt + +//===----------------------------------------------------------------------=== +// ST - +//===----------------------------------------------------------------------=== + + +//===----------------------------------------------------------------------===// +// NV/ST + +//===----------------------------------------------------------------------===// + +let opNewValue = 2, opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in +class T_store_io_nv <string mnemonic, RegisterClass RC, + Operand ImmOp, bits<2>MajOp> + : NVInst_V4 <(outs), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2) = $src3.new", + [],"",ST_tc_st_SLOT0> { + bits<5> src1; + bits<13> src2; // Actual address offset + bits<3> src3; + bits<11> offsetBits; // Represents offset encoding + + let opExtentBits = !if (!eq(mnemonic, "memb"), 11, + !if (!eq(mnemonic, "memh"), 12, + !if (!eq(mnemonic, "memw"), 13, 0))); + + let opExtentAlign = !if (!eq(mnemonic, "memb"), 0, + !if (!eq(mnemonic, "memh"), 1, + !if (!eq(mnemonic, "memw"), 2, 0))); + + let offsetBits = !if (!eq(mnemonic, "memb"), src2{10-0}, + !if (!eq(mnemonic, "memh"), src2{11-1}, + !if (!eq(mnemonic, "memw"), src2{12-2}, 0))); + + let IClass = 0b1010; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24-21} = 0b1101; + let Inst{20-16} = src1; + let Inst{13} = offsetBits{8}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src3; + let Inst{7-0} = offsetBits{7-0}; + } + +let opExtendable = 2, opNewValue = 3, isPredicated = 1 in +class T_pstore_io_nv <string mnemonic, RegisterClass RC, Operand predImmOp, + bits<2>MajOp, bit PredNot, bit isPredNew> + : NVInst_V4 <(outs), + (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC:$src4), + !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+#$src3) = $src4.new", + [],"",V2LDST_tc_st_SLOT0> { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src4; + bits<6> offsetBits; // Represents offset encoding + + let isPredicatedNew = isPredNew; + let isPredicatedFalse = PredNot; + let opExtentBits = !if (!eq(mnemonic, "memb"), 6, + !if (!eq(mnemonic, "memh"), 7, + !if (!eq(mnemonic, "memw"), 8, 0))); + + let opExtentAlign = !if (!eq(mnemonic, "memb"), 0, + !if (!eq(mnemonic, "memh"), 1, + !if (!eq(mnemonic, "memw"), 2, 0))); + + let offsetBits = !if (!eq(mnemonic, "memb"), src3{5-0}, + !if (!eq(mnemonic, "memh"), src3{6-1}, + !if (!eq(mnemonic, "memw"), src3{7-2}, 0))); + + let IClass = 0b0100; + + let Inst{27} = 0b0; + let Inst{26} = PredNot; + let Inst{25} = isPredNew; + let Inst{24-21} = 0b0101; + let Inst{20-16} = src2; + let Inst{13} = offsetBits{5}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src4; + let Inst{7-3} = offsetBits{4-0}; + let Inst{2} = 0b0; + let Inst{1-0} = src1; + } + +// multiclass for new-value store instructions with base + immediate offset. +// +let mayStore = 1, isNVStore = 1, isNewValue = 1, hasSideEffects = 0, + isExtendable = 1 in +multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<2> MajOp> { + + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + def S2_#NAME#new_io : T_store_io_nv <mnemonic, RC, ImmOp, MajOp>; + // Predicated + def S2_p#NAME#newt_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 0, 0>; + def S2_p#NAME#newf_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 1, 0>; + // Predicated new + def S4_p#NAME#newtnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp, + MajOp, 0, 1>; + def S4_p#NAME#newfnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp, + MajOp, 1, 1>; + } +} + +let addrMode = BaseImmOffset, InputType = "imm" in { + let accessSize = ByteAccess in + defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext, + u6_0Ext, 0b00>, AddrModeRel; + + let accessSize = HalfWordAccess, opExtentAlign = 1 in + defm storerh: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext, + u6_1Ext, 0b01>, AddrModeRel; + + let accessSize = WordAccess, opExtentAlign = 2 in + defm storeri: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext, + u6_2Ext, 0b10>, AddrModeRel; +} + +//===----------------------------------------------------------------------===// +// Post increment loads with register offset. +//===----------------------------------------------------------------------===// + +let hasNewValue = 1 in +def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>; + +def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>; + +let hasSideEffects = 0, addrMode = PostInc in +class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz> + : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_), + (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3), + "$dst = "#mnemonic#"($src2++$src3)", [], + "$src1 = $dst, $src2 = $_dst_"> { + bits<5> dst; + bits<5> src2; + bits<1> src3; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = src3; + let Inst{12} = 0b0; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>; +def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>; + +//===----------------------------------------------------------------------===// +// Template class for non-predicated post increment .new stores +// mem[bhwd](Rx++#s4:[0123])=Nt.new +//===----------------------------------------------------------------------===// +let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1, + isNewValue = 1, opNewValue = 3 in +class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp > + : NVInstPI_V4 <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2), + mnemonic#"($src1++#$offset) = $src2.new", + [], "$src1 = $_dst_">, + AddrModeRel { + bits<5> src1; + bits<3> src2; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0})); + let IClass = 0b1010; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = src1; + let Inst{13} = 0b0; + let Inst{12-11} = MajOp; + let Inst{10-8} = src2; + let Inst{7} = 0b0; + let Inst{6-3} = offsetBits; + let Inst{1} = 0b0; + } + +//===----------------------------------------------------------------------===// +// Template class for predicated post increment .new stores +// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new +//===----------------------------------------------------------------------===// +let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1, + isNewValue = 1, opNewValue = 4 in +class T_StorePI_nv_pred <string mnemonic, Operand ImmOp, + bits<2> MajOp, bit isPredNot, bit isPredNew > + : NVInstPI_V4 <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, + ImmOp:$offset, IntRegs:$src3), + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3.new", + [], "$src2 = $_dst_">, + AddrModeRel { + bits<2> src1; + bits<5> src2; + bits<3> src3; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0})); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = src2; + let Inst{13} = 0b1; + let Inst{12-11} = MajOp; + let Inst{10-8} = src3; + let Inst{7} = isPredNew; + let Inst{6-3} = offsetBits; + let Inst{2} = isPredNot; + let Inst{1-0} = src1; + } + +multiclass ST_PostInc_Pred_nv<string mnemonic, Operand ImmOp, + bits<2> MajOp, bit PredNot> { + def _pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 0>; + + // Predicate new + def new_pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 1>; +} + +multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp, + bits<2> MajOp> { + let BaseOpcode = "POST_"#BaseOp in { + def S2_#NAME#_pi : T_StorePI_nv <mnemonic, ImmOp, MajOp>; + + // Predicated + defm S2_p#NAME#t : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 0>; + defm S2_p#NAME#f : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 1>; + } +} + +let accessSize = ByteAccess in +defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>; + +let accessSize = HalfWordAccess in +defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>; + +let accessSize = WordAccess in +defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>; + +//===----------------------------------------------------------------------===// +// Template class for post increment .new stores with register offset +//===----------------------------------------------------------------------===// +let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in +class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz> + : NVInstPI_V4 <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, IntRegs:$src3), + #mnemonic#"($src1++$src2) = $src3.new", + [], "$src1 = $_dst_"> { + bits<5> src1; + bits<1> src2; + bits<3> src3; + let accessSize = AccessSz; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1101101; + let Inst{20-16} = src1; + let Inst{13} = src2; + let Inst{12-11} = MajOp; + let Inst{10-8} = src3; + let Inst{7} = 0b0; + } + +def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>; +def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>; +def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>; + +// memb(Rx++#s4:0:circ(Mu))=Nt.new +// memb(Rx++I:circ(Mu))=Nt.new +// memb(Rx++Mu:brev)=Nt.new +// memh(Rx++#s4:1:circ(Mu))=Nt.new +// memh(Rx++I:circ(Mu))=Nt.new +// memh(Rx++Mu)=Nt.new +// memh(Rx++Mu:brev)=Nt.new + +// memw(Rx++#s4:2:circ(Mu))=Nt.new +// memw(Rx++I:circ(Mu))=Nt.new +// memw(Rx++Mu)=Nt.new +// memw(Rx++Mu:brev)=Nt.new + +//===----------------------------------------------------------------------===// +// NV/ST - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// NV/J + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps with the register +// operands. +//===----------------------------------------------------------------------===// + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11, + opExtentAlign = 2 in +class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, + bit isNegCond, bit isTak> + : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic# + "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")# + "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:" + #!if(isTak, "t","nt")#" $offset", []> { + + bits<5> src1; + bits<5> src2; + bits<3> Ns; // New-Value Operand + bits<5> RegOp; // Non-New-Value Operand + bits<11> offset; + + let isTaken = isTak; + let isPredicatedFalse = isNegCond; + let opNewValue{0} = NvOpNum; + + let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0}); + let RegOp = !if(!eq(NvOpNum, 0), src2, src1); + + let IClass = 0b0010; + let Inst{27-26} = 0b00; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = Ns; + let Inst{13} = isTak; + let Inst{12-8} = RegOp; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; +} + + +multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum, + bit isNegCond> { + // Branch not taken: + def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>; + // Branch taken: + def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>; +} + +// NvOpNum = 0 -> First Operand is a new-value Register +// NvOpNum = 1 -> Second Operand is a new-value Register + +multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp, + bit NvOpNum> { + let BaseOpcode = BaseOp#_NVJ in { + defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond + defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond + } +} + +// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, + Defs = [PC], hasSideEffects = 0 in { + defm J4_cmpeq : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel; + defm J4_cmpgt : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel; + defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel; + defm J4_cmplt : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel; + defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel; +} + +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps instruction +// with a register and an unsigned immediate (U5) operand. +//===----------------------------------------------------------------------===// + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11, + opExtentAlign = 2 in +class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond, + bit isTak> + : NVInst_V4<(outs), + (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:" + #!if(isTak, "t","nt")#" $offset", []> { + + let isTaken = isTak; + let isPredicatedFalse = isNegCond; + let isTaken = isTak; + + bits<3> src1; + bits<5> src2; + bits<11> offset; + + let IClass = 0b0010; + let Inst{26} = 0b1; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = src1; + let Inst{13} = isTak; + let Inst{12-8} = src2; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; +} + +multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> { + // Branch not taken: + def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>; + // Branch taken: + def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>; +} + +multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> { + let BaseOpcode = BaseOp#_NVJri in { + defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond + defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond + } +} + +// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, + Defs = [PC], hasSideEffects = 0 in { + defm J4_cmpeqi : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel; + defm J4_cmpgti : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel; + defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel; +} + +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps instruction +// with a register and an hardcoded 0/-1 immediate value. +//===----------------------------------------------------------------------===// + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11, + opExtentAlign = 2 in +class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal, + bit isNegCond, bit isTak> + : NVInst_V4<(outs), + (ins IntRegs:$src1, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic + #"($src1.new, #"#ImmVal#")) jump:" + #!if(isTak, "t","nt")#" $offset", []> { + + let isTaken = isTak; + let isPredicatedFalse = isNegCond; + let isTaken = isTak; + + bits<3> src1; + bits<11> offset; + let IClass = 0b0010; + let Inst{26} = 0b1; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = src1; + let Inst{13} = isTak; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; +} + +multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal, + bit isNegCond> { + // Branch not taken: + def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>; + // Branch taken: + def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>; +} + +multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, + string ImmVal> { + let BaseOpcode = BaseOp#_NVJ_ConstImm in { + defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True + defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False + } +} + +// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2 +// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1, + Defs = [PC], hasSideEffects = 0 in { + defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; + defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; + defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; +} + +// J4_hintjumpr: Hint indirect conditional jump. +let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def J4_hintjumpr: JRInst < + (outs), + (ins IntRegs:$Rs), + "hintjr($Rs)"> { + bits<5> Rs; + let IClass = 0b0101; + let Inst{27-21} = 0b0010101; + let Inst{20-16} = Rs; + } + +//===----------------------------------------------------------------------===// +// NV/J - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CR + +//===----------------------------------------------------------------------===// + +// PC-relative add +let hasNewValue = 1, isExtendable = 1, opExtendable = 1, + isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in +def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6), + "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > { + bits<5> Rd; + bits<6> u6; + + let IClass = 0b0110; + let Inst{27-16} = 0b101001001001; + let Inst{12-7} = u6; + let Inst{4-0} = Rd; + } + + + +let hasSideEffects = 0 in +class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg> + : CRInst<(outs PredRegs:$Pd), + (ins PredRegs:$Ps, PredRegs:$Pt, PredRegs:$Pu), + "$Pd = " # MnOp1 # "($Ps, " # MnOp2 # "($Pt, " # + !if (IsNeg,"!","") # "$Pu))", + [], "", CR_tc_2early_SLOT23> { + bits<2> Pd; + bits<2> Ps; + bits<2> Pt; + bits<2> Pu; + + let IClass = 0b0110; + let Inst{27-24} = 0b1011; + let Inst{23} = IsNeg; + let Inst{22-21} = OpBits; + let Inst{20} = 0b1; + let Inst{17-16} = Ps; + let Inst{13} = 0b0; + let Inst{9-8} = Pt; + let Inst{7-6} = Pu; + let Inst{1-0} = Pd; +} + +def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>; +def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>; +def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>; +def C4_or_or : T_LOGICAL_3OP<"or", "or", 0b11, 0>; +def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>; +def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>; +def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>; +def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>; + +// op(Ps, op(Pt, Pu)) +class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +// op(Ps, op(Pt, ~Pu)) +class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +def: LogLog_pat<and, and, C4_and_and>; +def: LogLog_pat<and, or, C4_and_or>; +def: LogLog_pat<or, and, C4_or_and>; +def: LogLog_pat<or, or, C4_or_or>; + +def: LogLogNot_pat<and, and, C4_and_andn>; +def: LogLogNot_pat<and, or, C4_and_orn>; +def: LogLogNot_pat<or, and, C4_or_andn>; +def: LogLogNot_pat<or, or, C4_or_orn>; + +//===----------------------------------------------------------------------===// +// PIC: Support for PIC compilations. The patterns and SD nodes defined +// below are needed to support code generation for PIC +//===----------------------------------------------------------------------===// + +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; + +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; + +//===----------------------------------------------------------------------===// +// CR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// XTYPE/ALU + +//===----------------------------------------------------------------------===// + +// Logical with-not instructions. +def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>; +def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>; + +def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +let hasNewValue = 1, hasSideEffects = 0 in +def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101111; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +// Add and accumulate. +// Rd=add(Rs,add(Ru,#s6)) +let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6, + opExtendable = 3 in +def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), + "$Rd = add($Rs, add($Ru, #$s6))" , + [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), + (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))], + "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Ru; + bits<6> s6; + + let IClass = 0b1101; + + let Inst{27-23} = 0b10110; + let Inst{22-21} = s6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = s6{3}; + let Inst{12-8} = Rd; + let Inst{7-5} = s6{2-0}; + let Inst{4-0} = Ru; + } + +let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1, + opExtentBits = 6, opExtendable = 2 in +def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru), + "$Rd = add($Rs, sub(#$s6, $Ru))", + [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<6> s6; + bits<5> Ru; + + let IClass = 0b1101; + + let Inst{27-23} = 0b10111; + let Inst{22-21} = s6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = s6{3}; + let Inst{12-8} = Rd; + let Inst{7-5} = s6{2-0}; + let Inst{4-0} = Ru; + } + +// Rd=add(Rs,sub(#s6,Ru)) +def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2, + (i32 IntRegs:$src3))), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; + +// Rd=sub(add(Rs,#s6),Ru) +def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2), + (i32 IntRegs:$src3)), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; + +// Rd=add(sub(Rs,Ru),#s6) +def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), + (s32ImmPred:$src2)), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; + + +// Add or subtract doublewords with carry. +//TODO: +// Rdd=add(Rss,Rtt,Px):carry +//TODO: +// Rdd=sub(Rss,Rtt,Px):carry + +// Extract bitfield +// Rdd=extract(Rss,#u6,#U6) +// Rdd=extract(Rss,Rtt) +// Rd=extract(Rs,Rtt) +// Rd=extract(Rs,#u5,#U5) + +def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>; +def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6Imm>; + +let hasNewValue = 1 in { + def S4_extract_rp : T_S3op_extract<"extract", 0b01>; + def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5Imm>; +} + +// Complex add/sub halfwords/words +let Defs = [USR_OVF] in { + def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>; + def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>; + def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>; + def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>; +} + +let Defs = [USR_OVF] in { + def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>; + def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>; +} + +let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in { + def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>; + def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>; +} + +// Logical xor with xor accumulation. +// Rxx^=xor(Rss,Rtt) +let hasSideEffects = 0 in +def M4_xor_xacc + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx ^= xor($Rss, $Rtt)", + [(set (i64 DoubleRegs:$Rxx), + (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss), + (i64 DoubleRegs:$Rtt))))], + "$dst2 = $Rxx", S_3op_tc_1_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1100; + + let Inst{27-22} = 0b101010; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rxx; + } + +// Rotate and reduce bytes +// Rdd=vrcrotate(Rss,Rt,#u2) +let hasSideEffects = 0 in +def S4_vrcrotate + : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + "$Rdd = vrcrotate($Rss, $Rt, #$u2)", + [], "", S_3op_tc_3x_SLOT23> { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rt; + bits<2> u2; + + let IClass = 0b1100; + + let Inst{27-22} = 0b001111; + let Inst{20-16} = Rss; + let Inst{13} = u2{1}; + let Inst{12-8} = Rt; + let Inst{7-6} = 0b11; + let Inst{5} = u2{0}; + let Inst{4-0} = Rdd; + } + +// Rotate and reduce bytes with accumulation +// Rxx+=vrcrotate(Rss,Rt,#u2) +let hasSideEffects = 0 in +def S4_vrcrotate_acc + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [], + "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + bits<2> u2; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = Rss; + let Inst{13} = u2{1}; + let Inst{12-8} = Rt; + let Inst{5} = u2{0}; + let Inst{4-0} = Rxx; + } + +// Vector reduce conditional negate halfwords +let hasSideEffects = 0 in +def S2_vrcnegh + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt), + "$Rxx += vrcnegh($Rss, $Rt)", [], + "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011001; + let Inst{20-16} = Rss; + let Inst{13} = 0b1; + let Inst{12-8} = Rt; + let Inst{7-5} = 0b111; + let Inst{4-0} = Rxx; + } + +// Split bitfield +def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>; + +// Arithmetic/Convergent round +def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>; + +def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>; + +let Defs = [USR_OVF] in +def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>; + +// Logical-logical words. +// Compound or-and -- Rx=or(Ru,and(Rx,#s10)) +let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10, + opExtendable = 3 in +def S4_or_andix: + ALU64Inst<(outs IntRegs:$Rx), + (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10), + "$Rx = or($Ru, and($_src_, #$s10))" , + [(set (i32 IntRegs:$Rx), + (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] , + "$_src_ = $Rx", ALU64_tc_2_SLOT23> { + bits<5> Rx; + bits<5> Ru; + bits<10> s10; + + let IClass = 0b1101; + + let Inst{27-22} = 0b101001; + let Inst{20-16} = Rx; + let Inst{21} = s10{9}; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Ru; + } + +// Miscellaneous ALU64 instructions. +// +let hasNewValue = 1, hasSideEffects = 0 in +def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0011111; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = 0b111; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0 in +def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0100; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0 in +def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0100; + let Inst{21} = 0b0; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, hasSideEffects = 0 in +def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7} = 0b0; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, hasSideEffects = 0 in +def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101100; + let Inst{20-16} = Rt; + let Inst{12-8} = Rs; + let Inst{7} = 0b1; + let Inst{4-0} = Rd; +} + +// Rx[&|]=xor(Rs,Rt) +def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>; +def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>; + +// Rx[&|^]=or(Rs,Rt) +def M4_xor_or : T_MType_acc_rr < "^= or", 0b110, 0b011, 0>; + +let CextOpcode = "ORr_ORr" in +def M4_or_or : T_MType_acc_rr < "|= or", 0b110, 0b000, 0>; +def M4_and_or : T_MType_acc_rr < "&= or", 0b010, 0b001, 0>; + +// Rx[&|^]=and(Rs,Rt) +def M4_xor_and : T_MType_acc_rr < "^= and", 0b110, 0b010, 0>; + +let CextOpcode = "ORr_ANDr" in +def M4_or_and : T_MType_acc_rr < "|= and", 0b010, 0b011, 0>; +def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>; + +// Rx[&|^]=and(Rs,~Rt) +def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>; +def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>; +def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>; + +def: T_MType_acc_pat2 <M4_or_xor, xor, or>; +def: T_MType_acc_pat2 <M4_and_xor, xor, and>; +def: T_MType_acc_pat2 <M4_or_and, and, or>; +def: T_MType_acc_pat2 <M4_and_and, and, and>; +def: T_MType_acc_pat2 <M4_xor_and, and, xor>; +def: T_MType_acc_pat2 <M4_or_or, or, or>; +def: T_MType_acc_pat2 <M4_and_or, or, and>; +def: T_MType_acc_pat2 <M4_xor_or, or, xor>; + +class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp> + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, + (not IntRegs:$src3)))), + (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>; + +def: T_MType_acc_pat3 <M4_or_andn, and, or>; +def: T_MType_acc_pat3 <M4_and_andn, and, and>; +def: T_MType_acc_pat3 <M4_xor_andn, and, xor>; + +// Compound or-or and or-and +let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1, + opExtentBits = 10, opExtendable = 3 in +class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode> + : MInst_acc <(outs IntRegs:$Rx), + (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10), + "$Rx |= "#mnemonic#"($Rs, #$s10)", + [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1), + (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))], + "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel { + bits<5> Rx; + bits<5> Rs; + bits<10> s10; + + let IClass = 0b1101; + + let Inst{27-24} = 0b1010; + let Inst{23-22} = MajOp; + let Inst{20-16} = Rs; + let Inst{21} = s10{9}; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Rx; + } + +let CextOpcode = "ORr_ANDr" in +def S4_or_andi : T_CompOR <"and", 0b00, and>; + +let CextOpcode = "ORr_ORr" in +def S4_or_ori : T_CompOR <"or", 0b10, or>; + +// Modulo wrap +// Rd=modwrap(Rs,Rt) +// Round +// Rd=cround(Rs,#u5) +// Rd=cround(Rs,Rt) +// Rd=round(Rs,#u5)[:sat] +// Rd=round(Rs,Rt)[:sat] +// Vector reduce add unsigned halfwords +// Rd=vraddh(Rss,Rtt) +// Vector add bytes +// Rdd=vaddb(Rss,Rtt) +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +// Rxx+=vrcnegh(Rss,Rt) +// Vector maximum bytes +// Rdd=vmaxb(Rtt,Rss) +// Vector reduce maximum halfwords +// Rxx=vrmaxh(Rss,Ru) +// Rxx=vrmaxuh(Rss,Ru) +// Vector reduce maximum words +// Rxx=vrmaxuw(Rss,Ru) +// Rxx=vrmaxw(Rss,Ru) +// Vector minimum bytes +// Rdd=vminb(Rtt,Rss) +// Vector reduce minimum halfwords +// Rxx=vrminh(Rss,Ru) +// Rxx=vrminuh(Rss,Ru) +// Vector reduce minimum words +// Rxx=vrminuw(Rss,Ru) +// Rxx=vrminw(Rss,Ru) +// Vector subtract bytes +// Rdd=vsubb(Rss,Rtt) + +//===----------------------------------------------------------------------===// +// XTYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// XTYPE/BIT + +//===----------------------------------------------------------------------===// + +// Bit reverse +def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>; + +// Bit count +def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>; +def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>; +def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>; + +// Count trailing zeros: 64-bit. +def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; +def: Pat<(i32 (trunc (cttz_zero_undef I64:$Rss))), (S2_ct0p I64:$Rss)>; + +// Count trailing ones: 64-bit. +def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; +def: Pat<(i32 (trunc (cttz_zero_undef (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; + +// Define leading/trailing patterns that require zero-extensions to 64 bits. +def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (ctlz_zero_undef I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (cttz_zero_undef I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (ctlz_zero_undef (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; +def: Pat<(i64 (cttz_zero_undef (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; + + +let hasSideEffects = 0, hasNewValue = 1 in +def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6), + "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { + bits<5> Rs; + bits<5> Rd; + bits<6> s6; + let IClass = 0b1000; + let Inst{27-24} = 0b1100; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rs; + let Inst{13-8} = s6; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0, hasNewValue = 1 in +def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6), + "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { + bits<5> Rs; + bits<5> Rd; + bits<6> s6; + let IClass = 0b1000; + let Inst{27-24} = 0b1000; + let Inst{23-21} = 0b011; + let Inst{20-16} = Rs; + let Inst{13-8} = s6; + let Inst{7-5} = 0b010; + let Inst{4-0} = Rd; +} + + +// Bit test/set/clear +def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>; +def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>; + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), + (S4_ntstbit_i (i32 IntRegs:$Rs), u5ImmPred:$u5)>; + def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), + (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>; +} + +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... +let AddedComplexity = 100 in +def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), + (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; + +let AddedComplexity = 100 in +def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), + (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; + +def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>; +def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>; +def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>; + +// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be +// represented as a compare against "value & 0xFF", which is an exact match +// for cmpb (same for cmph). The patterns below do not contain any additional +// complexity that would make them preferable, and if they were actually used +// instead of cmpb/cmph, they would result in a compare against register that +// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). +def: Pat<(i1 (setne (and I32:$Rs, u6ImmPred:$u6), 0)), + (C4_nbitsclri I32:$Rs, u6ImmPred:$u6)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), + (C4_nbitsclr I32:$Rs, I32:$Rt)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), + (C4_nbitsset I32:$Rs, I32:$Rt)>; + +//===----------------------------------------------------------------------===// +// XTYPE/BIT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// XTYPE/MPY + +//===----------------------------------------------------------------------===// + +// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed. + +let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in +def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), + (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6), + "$Rd = add(#$u6, mpyi($Rs, #$U6))" , + [(set (i32 IntRegs:$Rd), + (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6), + u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + bits<5> Rd; + bits<6> u6; + bits<5> Rs; + bits<6> U6; + + let IClass = 0b1101; + + let Inst{27-24} = 0b1000; + let Inst{23} = U6{5}; + let Inst{22-21} = u6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = u6{3}; + let Inst{12-8} = Rd; + let Inst{7-5} = u6{2-0}; + let Inst{4-0} = U6{4-0}; + } + +// Rd=add(#u6,mpyi(Rs,Rt)) +let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1, + isExtendable = 1, opExtentBits = 6, opExtendable = 1 in +def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), + (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), + "$Rd = add(#$u6, mpyi($Rs, $Rt))" , + [(set (i32 IntRegs:$Rd), + (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))], + "", ALU64_tc_3x_SLOT23>, ImmRegRel { + bits<5> Rd; + bits<6> u6; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b01110; + let Inst{22-21} = u6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = u6{3}; + let Inst{12-8} = Rt; + let Inst{7-5} = u6{2-0}; + let Inst{4-0} = Rd; + } + +let hasNewValue = 1 in +class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins> + : ALU64Inst <(outs IntRegs:$dst), ins, + "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))", + "#$src2, $src3))"), + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))], + "", ALU64_tc_3x_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<8> src2; + bits<5> src3; + + let IClass = 0b1101; + + bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2}); + + let Inst{27-24} = 0b1111; + let Inst{23} = MajOp; + let Inst{22-21} = ImmValue{5-4}; + let Inst{20-16} = src3; + let Inst{13} = ImmValue{3}; + let Inst{12-8} = dst; + let Inst{7-5} = ImmValue{2-0}; + let Inst{4-0} = src1; + } + +def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred, + (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>; + +let isExtendable = 1, opExtentBits = 6, opExtendable = 3, + CextOpcode = "ADD_MPY", InputType = "imm" in +def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred, + (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel; + +// Rx=add(Ru,mpyi(Rx,Rs)) +let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in +def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), + (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs), + "$Rx = add($Ru, mpyi($_src_, $Rs))", + [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru), + (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))], + "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel { + bits<5> Rx; + bits<5> Ru; + bits<5> Rs; + + let IClass = 0b1110; + + let Inst{27-21} = 0b0011000; + let Inst{12-8} = Rx; + let Inst{4-0} = Ru; + let Inst{20-16} = Rs; + } + + +// Vector reduce multiply word by signed half (32x16) +//Rdd=vrmpyweh(Rss,Rtt)[:<<1] +def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>; +def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>; +def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>; + +//Rdd+=vrmpyweh(Rss,Rtt)[:<<1] +def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>; +def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>; +def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>; + +// Vector multiply halfwords, signed by unsigned +// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat +def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>; +def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>; + +// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat +def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>; +def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>; + +// Vector polynomial multiply halfwords +// Rdd=vpmpyh(Rs,Rt) +def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>; + +// Rxx^=vpmpyh(Rs,Rt) +def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>; + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>; + +// Rxx^=pmpyw(Rs,Rt) +def M4_pmpyw_acc : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>; + +//===----------------------------------------------------------------------===// +// XTYPE/MPY - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/Vector compare +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Template class for vector compare +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0 in +class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd> + : ALU64_rr <(outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, ImmOprnd:$Imm), + "$Pd = "#Str#"($Rss, #$Imm)", + [], "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rss; + bits<32> Imm; + bits<8> ImmBits; + let ImmBits{6-0} = Imm{6-0}; + let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu + + let IClass = 0b1101; + + let Inst{27-24} = 0b1100; + let Inst{22-21} = cmpOp; + let Inst{20-16} = Rss; + let Inst{12-5} = ImmBits; + let Inst{4-3} = minOp; + let Inst{1-0} = Pd; + } + +// Vector compare bytes +def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>; +def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>; + +let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in +def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>; + +def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8Imm>; +def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8Imm>; +def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>; + +// Vector compare halfwords +def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8Imm>; +def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8Imm>; +def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>; + +// Vector compare words +def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8Imm>; +def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8Imm>; +def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>; + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT + +//===----------------------------------------------------------------------===// +// Shift by immediate and accumulate/logical. +// Rx=add(#u8,asl(Rx,#U5)) Rx=add(#u8,lsr(Rx,#U5)) +// Rx=sub(#u8,asl(Rx,#U5)) Rx=sub(#u8,lsr(Rx,#U5)) +// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5)) +// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, + hasNewValue = 1, opNewValue = 0 in +class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh, + bit asl_lsr, bits<2> MajOp, InstrItinClass Itin> + : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5), + "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))", + [(set (i32 IntRegs:$Rd), + (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))], + "$Rd = $Rx", Itin> { + + bits<5> Rd; + bits<8> u8; + bits<5> Rx; + bits<5> U5; + + let IClass = 0b1101; + let Inst{27-24} = 0b1110; + let Inst{23-21} = u8{7-5}; + let Inst{20-16} = Rd; + let Inst{13} = u8{4}; + let Inst{12-8} = U5; + let Inst{7-5} = u8{3-1}; + let Inst{4} = asl_lsr; + let Inst{3} = u8{0}; + let Inst{2-1} = MajOp; +} + +multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp, + InstrItinClass Itin> { + def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", Op, shl, 0, MajOp, Itin>; + def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>; +} + +let AddedComplexity = 200 in { + defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>; + defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>; +} + +let AddedComplexity = 30 in +defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>; + +defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>; + +let AddedComplexity = 200 in { + def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), + (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), + (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), + (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), + (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; +} + +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def S2_vcnegh : T_S3op_shiftVect < "vcnegh", 0b11, 0b01>; + +// Rd=[cround|round](Rs,Rt) +let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in { + def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>; + def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>; +} + +// Rd=round(Rs,Rt):sat +let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>; + +// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat +let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { + def M4_cmpyi_wh : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>; + def M4_cmpyr_wh : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>; +} + +// Rdd=[add|sub](Rss,Rtt,Px):carry +let isPredicateLate = 1, hasSideEffects = 0 in +class T_S3op_carry <string mnemonic, bits<3> MajOp> + : SInst < (outs DoubleRegs:$Rdd, PredRegs:$Px), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu), + "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu):carry", + [], "$Px = $Pu", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<2> Pu; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0010; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{6-5} = Pu; + let Inst{4-0} = Rdd; + } + +def A4_addp_c : T_S3op_carry < "add", 0b110 >; +def A4_subp_c : T_S3op_carry < "sub", 0b111 >; + +let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in +class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned> + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru), + "$Rxx = "#mnemonic#"($Rss, $Ru)" , + [] , "$dst2 = $Rxx"> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Ru; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011001; + let Inst{20-16} = Rss; + let Inst{13} = isUnsigned; + let Inst{12-8} = Rxx; + let Inst{7-5} = MinOp; + let Inst{4-0} = Ru; + } + +// Vector reduce maximum halfwords +// Rxx=vrmax[u]h(Rss,Ru) +def A4_vrmaxh : T_S3op_6 < "vrmaxh", 0b001, 0>; +def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>; + +// Vector reduce maximum words +// Rxx=vrmax[u]w(Rss,Ru) +def A4_vrmaxw : T_S3op_6 < "vrmaxw", 0b010, 0>; +def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>; + +// Vector reduce minimum halfwords +// Rxx=vrmin[u]h(Rss,Ru) +def A4_vrminh : T_S3op_6 < "vrminh", 0b101, 0>; +def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>; + +// Vector reduce minimum words +// Rxx=vrmin[u]w(Rss,Ru) +def A4_vrminw : T_S3op_6 < "vrminw", 0b110, 0>; +def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>; + +// Shift an immediate left by register amount. +let hasNewValue = 1, hasSideEffects = 0 in +def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt), + "$Rd = lsl(#$s6, $Rt)" , + [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6, + (i32 IntRegs:$Rt)))], + "", S_3op_tc_1_SLOT23> { + bits<5> Rd; + bits<6> s6; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-22} = 0b011010; + let Inst{20-16} = s6{5-1}; + let Inst{12-8} = Rt; + let Inst{7-6} = 0b11; + let Inst{4-0} = Rd; + let Inst{5} = s6{0}; + } + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MEMOP: Word, Half, Byte +//===----------------------------------------------------------------------===// + +def MEMOPIMM : SDNodeXForm<imm, [{ + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int32_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm, SDLoc(N)); +}]>; + +def MEMOPIMM_HALF : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 65535..65515 + // assigning to a short restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int16_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm, SDLoc(N)); +}]>; + +def MEMOPIMM_BYTE : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 255..235 + // assigning to a char restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int8_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm, SDLoc(N)); +}]>; + +def SETMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-31]. + // As an SDNode. + int32_t imm = N->getSExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def CLRMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-31]. + // As an SDNode. + // we bit negate the value first + int32_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def SETMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-15]. + // As an SDNode. + int16_t imm = N->getSExtValue(); + return XformMskToBitPosU4Imm(imm, SDLoc(N)); +}]>; + +def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-15]. + // As an SDNode. + // we bit negate the value first + int16_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU4Imm(imm, SDLoc(N)); +}]>; + +def SETMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-7]. + // As an SDNode. + int8_t imm = N->getSExtValue(); + return XformMskToBitPosU3Imm(imm, SDLoc(N)); +}]>; + +def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-7]. + // As an SDNode. + // we bit negate the value first + int8_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU3Imm(imm, SDLoc(N)); +}]>; + +//===----------------------------------------------------------------------===// +// Template class for MemOp instructions with the register value. +//===----------------------------------------------------------------------===// +class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4<(outs), + (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta), + opc#"($base+#$offset)"#memOp#"$delta", + []>, + Requires<[UseMEMOP]> { + + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); + + let opExtentAlign = opcBits; + let IClass = 0b0011; + let Inst{27-24} = 0b1110; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} + +//===----------------------------------------------------------------------===// +// Template class for MemOp instructions with the immediate value. +//===----------------------------------------------------------------------===// +class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4 <(outs), + (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta), + opc#"($base+#$offset)"#memOp#"#$delta" + #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')' + []>, + Requires<[UseMEMOP]> { + + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); + + let opExtentAlign = opcBits; + let IClass = 0b0011; + let Inst{27-24} = 0b1111; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} + +// multiclass to define MemOp instructions with register operand. +multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> { + def L4_add#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add + def L4_sub#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub + def L4_and#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and + def L4_or#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or +} + +// multiclass to define MemOp instructions with immediate Operand. +multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> { + def L4_iadd#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >; + def L4_isub#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >; + def L4_iand#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = clrbit(", 0b10>; + def L4_ior#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = setbit(", 0b11>; +} + +multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> { + defm _#NAME : MemOp_rr <opc, opcBits, ImmOp>; + defm _#NAME : MemOp_ri <opc, opcBits, ImmOp>; +} + +// Define MemOp instructions. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { + let opExtentBits = 6, accessSize = ByteAccess in + defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>; + + let opExtentBits = 7, accessSize = HalfWordAccess in + defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>; + + let opExtentBits = 8, accessSize = WordAccess in + defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for ALU operations on the memory +// Here value used for the ALU operation is an immediate value. +// mem[bh](Rs+#0) += #U5 +// mem[bh](Rs+#u6) += #U5 +//===----------------------------------------------------------------------===// + +multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, + InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 180 in + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), + IntRegs:$addr), + (MI IntRegs:$addr, 0, u5ImmPred:$addend)>; + + let AddedComplexity = 190 in + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)), + u5ImmPred:$addend), + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>; +} + +multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, + InstHexagon addMI, InstHexagon subMI> { + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>; + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>; +} + +multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, + L4_iadd_memoph_io, L4_isub_memoph_io>; + // Byte + defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred, + L4_iadd_memopb_io, L4_isub_memopb_io>; +} + +let Predicates = [UseMEMOP] in { + defm: MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io, + L4_isub_memopw_io>; +} + +//===----------------------------------------------------------------------===// +// multiclass to define 'Def Pats' for ALU operations on the memory. +// Here value used for the ALU operation is a negative value. +// mem[bh](Rs+#0) += #m5 +// mem[bh](Rs+#u6) += #m5 +//===----------------------------------------------------------------------===// + +multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, + PatLeaf immPred, SDNodeXForm xformFunc, + InstHexagon MI> { + let AddedComplexity = 190 in + def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr), + (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>; + + let AddedComplexity = 195 in + def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)), + immPred:$subend), + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>; +} + +multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred, + MEMOPIMM_HALF, L4_isub_memoph_io>; + // Byte + defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred, + MEMOPIMM_BYTE, L4_isub_memopb_io>; +} + +let Predicates = [UseMEMOP] in { + defm: MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred, + MEMOPIMM, L4_isub_memopw_io>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for bit operations on the memory. +// mem[bhw](Rs+#0) = [clrbit|setbit](#U5) +// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5) +//===----------------------------------------------------------------------===// + +multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, + PatLeaf extPred, SDNodeXForm xformFunc, InstHexagon MI, + SDNode OpNode> { + + // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5) + let AddedComplexity = 250 in + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$bitend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; + + // mem[bhw](Rs+#0) = [clrbit|setbit](#U5) + let AddedComplexity = 225 in + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr), + (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>; +} + +multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> { + // Byte - clrbit + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred, + CLRMEMIMM_BYTE, L4_iand_memopb_io, and>; + // Byte - setbit + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred, + SETMEMIMM_BYTE, L4_ior_memopb_io, or>; + // Half Word - clrbit + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred, + CLRMEMIMM_SHORT, L4_iand_memoph_io, and>; + // Half Word - setbit + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred, + SETMEMIMM_SHORT, L4_ior_memoph_io, or>; +} + +let Predicates = [UseMEMOP] in { + // mem[bh](Rs+#0) = [clrbit|setbit](#U5) + // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5) + defm: MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_bitExtType<extloadi8, extloadi16>; // any extend + + // memw(Rs+#0) = [clrbit|setbit](#U5) + // memw(Rs+#u6:2) = [clrbit|setbit](#U5) + defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM, + L4_iand_memopw_io, and>; + defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM, + L4_ior_memopw_io, or>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for ALU operations on the memory +// where addend is a register. +// mem[bhw](Rs+#0) [+-&|]= Rt +// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt +//===----------------------------------------------------------------------===// + +multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, + InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 141 in + // mem[bhw](Rs+#0) [+-&|]= Rt + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)), + IntRegs:$addr), + (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>; + + // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt + let AddedComplexity = 150 in + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + (i32 IntRegs:$orend)), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>; +} + +multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred, + InstHexagon addMI, InstHexagon subMI, + InstHexagon andMI, InstHexagon orMI> { + defm: MemOpr_Pats <ldOp, stOp, extPred, addMI, add>; + defm: MemOpr_Pats <ldOp, stOp, extPred, subMI, sub>; + defm: MemOpr_Pats <ldOp, stOp, extPred, andMI, and>; + defm: MemOpr_Pats <ldOp, stOp, extPred, orMI, or>; +} + +multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, + L4_add_memoph_io, L4_sub_memoph_io, + L4_and_memoph_io, L4_or_memoph_io>; + // Byte + defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred, + L4_add_memopb_io, L4_sub_memopb_io, + L4_and_memopb_io, L4_or_memopb_io>; +} + +// Define 'def Pats' for MemOps with register addend. +let Predicates = [UseMEMOP] in { + // Byte, Half Word + defm: MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend + // Word + defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io, + L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>; +} + +//===----------------------------------------------------------------------===// +// XTYPE/PRED + +//===----------------------------------------------------------------------===// + +// Hexagon V4 only supports these flavors of byte/half compare instructions: +// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by +// hardware. However, compiler can still implement these patterns through +// appropriate patterns combinations based on current implemented patterns. +// The implemented patterns are: EQ/GT/GTU. +// Missing patterns are: GE/GEU/LT/LTU/LE/LEU. + +// Following instruction is not being extended as it results into the +// incorrect code for negative numbers. +// Pd=cmpb.eq(Rs,#u8) + +// p=!cmp.eq(r1,#s10) +def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>; +def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>; +def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>; + +def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>; +def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>; +def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>; + +// rs <= rt -> !(rs > rt). +/* +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; +// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>; +*/ +// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; + +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_BYTE : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformU7ToU7M1Imm(imm, SDLoc(N)); +}]>; + +// For the sequence +// zext( setult ( and(Rs, 255), u8)) +// Use the isdigit transformation below + +// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' +// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// +// For the C code: +// retval = ((c>='0') & (c<='9')) ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; +let AddedComplexity = 139 in +def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), + u7StrictPosImmPred:$src2)))), + (C2_muxii (A4_cmpbgtui IntRegs:$src1, + (DEC_CONST_BYTE u7StrictPosImmPred:$src2)), + 0, 1)>; + +//===----------------------------------------------------------------------===// +// XTYPE/PRED - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Multiclass for DeallocReturn +//===----------------------------------------------------------------------===// +class L4_RETURN<string mnemonic, bit isNot, bit isPredNew, bit isTak> + : LD0Inst<(outs), (ins PredRegs:$src), + !if(isNot, "if (!$src", "if ($src")# + !if(isPredNew, ".new) ", ") ")#mnemonic# + !if(isPredNew, #!if(isTak,":t", ":nt"),""), + [], "", LD_tc_3or4stall_SLOT0> { + + bits<2> src; + let BaseOpcode = "L4_RETURN"; + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; + let isTaken = isTak; + let IClass = 0b1001; + + let Inst{27-16} = 0b011000011110; + + let Inst{13} = isNot; + let Inst{12} = isTak; + let Inst{11} = isPredNew; + let Inst{10} = 0b0; + let Inst{9-8} = src; + let Inst{4-0} = 0b11110; + } + +// Produce all predicated forms, p, !p, p.new, !p.new, :t, :nt +multiclass L4_RETURN_PRED<string mnemonic, bit PredNot> { + let isPredicated = 1 in { + def _#NAME# : L4_RETURN <mnemonic, PredNot, 0, 1>; + def _#NAME#new_pnt : L4_RETURN <mnemonic, PredNot, 1, 0>; + def _#NAME#new_pt : L4_RETURN <mnemonic, PredNot, 1, 1>; + } +} + +multiclass LD_MISC_L4_RETURN<string mnemonic> { + let isBarrier = 1, isPredicable = 1 in + def NAME : LD0Inst <(outs), (ins), mnemonic, [], "", + LD_tc_3or4stall_SLOT0> { + let BaseOpcode = "L4_RETURN"; + let IClass = 0b1001; + let Inst{27-16} = 0b011000011110; + let Inst{13-10} = 0b0000; + let Inst{4-0} = 0b11110; + } + defm t : L4_RETURN_PRED<mnemonic, 0 >; + defm f : L4_RETURN_PRED<mnemonic, 1 >; +} + +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in +defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; + +// Restore registers and dealloc return function call. +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { + def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">; +} + +// Restore registers and dealloc frame before a tail call. +let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel; +} + +// Save registers function call. +let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { + def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; + let isExtended = 1, opExtendable = 0 in + def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel; +} + +//===----------------------------------------------------------------------===// +// Template class for non predicated store instructions with +// GP-Relative or absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicable = 1 in +class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<2>MajOp, bit isAbs, bit isHalf> + : STInst<(outs), (ins ImmOp:$addr, RC:$src), + mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""), + [], "", V2LDST_tc_st_SLOT01> { + bits<19> addr; + bits<5> src; + bits<16> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3}, + !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, + !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, + /* u16_0Imm */ addr{15-0}))); + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + + let IClass = 0b0100; + let Inst{27} = 1; + let Inst{26-25} = offsetBits{15-14}; + let Inst{24} = 0b0; + let Inst{23-22} = MajOp; + let Inst{21} = isHalf; + let Inst{20-16} = offsetBits{13-9}; + let Inst{13} = offsetBits{8}; + let Inst{12-8} = src; + let Inst{7-0} = offsetBits{7-0}; + } + +//===----------------------------------------------------------------------===// +// Template class for predicated store instructions with +// GP-Relative or absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in +class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, + bit isHalf, bit isNot, bit isNew> + : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", + ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""), + [], "", ST_tc_st_SLOT01>, AddrModeRel { + bits<2> src1; + bits<6> absaddr; + bits<5> src2; + + let isPredicatedNew = isNew; + let isPredicatedFalse = isNot; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + + let IClass = 0b1010; + + let Inst{27-24} = 0b1111; + let Inst{23-22} = MajOp; + let Inst{21} = isHalf; + let Inst{17-16} = absaddr{5-4}; + let Inst{13} = isNew; + let Inst{12-8} = src2; + let Inst{7} = 0b1; + let Inst{6-3} = absaddr{3-0}; + let Inst{2} = isNot; + let Inst{1-0} = src1; + } + +//===----------------------------------------------------------------------===// +// Template class for predicated store instructions with absolute addressing. +//===----------------------------------------------------------------------===// +class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<2> MajOp, bit isHalf> + : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>, + AddrModeRel { + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, + !if (!eq(ImmOpStr, "u16_2Imm"), 18, + !if (!eq(ImmOpStr, "u16_1Imm"), 17, + /* u16_0Imm */ 16))); + + let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3, + !if (!eq(ImmOpStr, "u16_2Imm"), 2, + !if (!eq(ImmOpStr, "u16_1Imm"), 1, + /* u16_0Imm */ 0))); +} + +//===----------------------------------------------------------------------===// +// Multiclass for store instructions with absolute addressing. +//===----------------------------------------------------------------------===// +let addrMode = Absolute, isExtended = 1 in +multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, bits<2> MajOp, bit isHalf = 0> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def S2_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>; + + // Predicated + def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>; + def S4_p#NAME#f_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 0>; + + // .new Predicated + def S4_p#NAME#tnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 1>; + def S4_p#NAME#fnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 1>; + } +} + +//===----------------------------------------------------------------------===// +// Template class for non predicated new-value store instructions with +// GP-Relative or absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1, + isNewValue = 1, opNewValue = 1 in +class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs> + : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src), + mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new", + [], "", V2LDST_tc_st_SLOT0> { + bits<19> addr; + bits<3> src; + bits<16> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3}, + !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, + !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, + /* u16_0Imm */ addr{15-0}))); + let IClass = 0b0100; + + let Inst{27} = 1; + let Inst{26-25} = offsetBits{15-14}; + let Inst{24-21} = 0b0101; + let Inst{20-16} = offsetBits{13-9}; + let Inst{13} = offsetBits{8}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src; + let Inst{7-0} = offsetBits{7-0}; + } + +//===----------------------------------------------------------------------===// +// Template class for predicated new-value store instructions with +// absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1, + isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in +class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew> + : NVInst_V4<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, IntRegs:$src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", + ") ")#mnemonic#"(#$absaddr) = $src2.new", + [], "", ST_tc_st_SLOT0>, AddrModeRel { + bits<2> src1; + bits<6> absaddr; + bits<3> src2; + + let isPredicatedNew = isNew; + let isPredicatedFalse = isNot; + + let IClass = 0b1010; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = 0b101; + let Inst{17-16} = absaddr{5-4}; + let Inst{13} = isNew; + let Inst{12-11} = MajOp; + let Inst{10-8} = src2; + let Inst{7} = 0b1; + let Inst{6-3} = absaddr{3-0}; + let Inst{2} = isNot; + let Inst{1-0} = src1; +} + +//===----------------------------------------------------------------------===// +// Template class for non-predicated new-value store instructions with +// absolute addressing. +//===----------------------------------------------------------------------===// +class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp> + : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 1>, AddrModeRel { + + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, + !if (!eq(ImmOpStr, "u16_2Imm"), 18, + !if (!eq(ImmOpStr, "u16_1Imm"), 17, + /* u16_0Imm */ 16))); + + let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3, + !if (!eq(ImmOpStr, "u16_2Imm"), 2, + !if (!eq(ImmOpStr, "u16_1Imm"), 1, + /* u16_0Imm */ 0))); +} + +//===----------------------------------------------------------------------===// +// Multiclass for new-value store instructions with absolute addressing. +//===----------------------------------------------------------------------===// +let addrMode = Absolute, isExtended = 1 in +multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp, + bits<2> MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def S2_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>; + + // Predicated + def S4_p#NAME#newt_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>; + def S4_p#NAME#newf_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 0>; + + // .new Predicated + def S4_p#NAME#newtnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 1>; + def S4_p#NAME#newfnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 1>; + } +} + +//===----------------------------------------------------------------------===// +// Stores with absolute addressing +//===----------------------------------------------------------------------===// +let accessSize = ByteAccess in +defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>, + ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>; + +let accessSize = HalfWordAccess in +defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>, + ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>; + +let accessSize = WordAccess in +defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>, + ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>; + +let isNVStorable = 0, accessSize = DoubleWordAccess in +defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>; + +let isNVStorable = 0, accessSize = HalfWordAccess in +defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>; + +//===----------------------------------------------------------------------===// +// GP-relative stores. +// mem[bhwd](#global)=Rt +// Once predicated, these instructions map to absolute addressing mode. +// if ([!]Pv[.new]) mem[bhwd](##global)=Rt +//===----------------------------------------------------------------------===// + +let isAsmParserOnly = 1 in +class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp, bits<2> MajOp, bit isHalf = 0> + : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> { + // Set BaseOpcode same as absolute addressing instructions so that + // non-predicated GP-Rel instructions can have relate with predicated + // Absolute instruction. + let BaseOpcode = BaseOp#_abs; + } + +let isAsmParserOnly = 1 in +multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp, + bits<2> MajOp, bit isHalf = 0> { + // Set BaseOpcode same as absolute addressing instructions so that + // non-predicated GP-Rel instructions can have relate with predicated + // Absolute instruction. + let BaseOpcode = BaseOp#_abs in { + def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp, + 0, isHalf>; + // New-value store + def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ; + } +} + +let accessSize = ByteAccess in +defm S2_storerb : ST_GP<"memb", "STrib", u16_0Imm, 0b00>, NewValueRel; + +let accessSize = HalfWordAccess in +defm S2_storerh : ST_GP<"memh", "STrih", u16_1Imm, 0b01>, NewValueRel; + +let accessSize = WordAccess in +defm S2_storeri : ST_GP<"memw", "STriw", u16_2Imm, 0b10>, NewValueRel; + +let isNVStorable = 0, accessSize = DoubleWordAccess in +def S2_storerdgp : T_StoreGP <"memd", "STrid", DoubleRegs, + u16_3Imm, 0b11>, PredNewRel; + +let isNVStorable = 0, accessSize = HalfWordAccess in +def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs, + u16_1Imm, 0b01, 1>, PredNewRel; + +class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; + +class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; + +class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; + +class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), + (MI Addr:$addr, (ValueMod Value:$val))>; + +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; + +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; + def: Storea_pat<store, I32, addrgp, S2_storerigp>; + def: Storea_pat<store, I64, addrgp, S2_storerdgp>; + + // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" + // to "r0 = 1; memw(#foo) = r0" + let AddedComplexity = 100 in + def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; +} + +//===----------------------------------------------------------------------===// +// Template class for non predicated load instructions with +// absolute addressing mode. +//===----------------------------------------------------------------------===// +let isPredicable = 1, hasSideEffects = 0 in +class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3> MajOp> + : LDInst <(outs RC:$dst), (ins ImmOp:$addr), + "$dst = "#mnemonic# "(#$addr)", + [], "", V2LDST_tc_ld_SLOT01> { + bits<5> dst; + bits<19> addr; + bits<16> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3}, + !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, + !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, + /* u16_0Imm */ addr{15-0}))); + + let IClass = 0b0100; + + let Inst{27} = 0b1; + let Inst{26-25} = offsetBits{15-14}; + let Inst{24} = 0b1; + let Inst{23-21} = MajOp; + let Inst{20-16} = offsetBits{13-9}; + let Inst{13-5} = offsetBits{8-0}; + let Inst{4-0} = dst; + } + +class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3> MajOp> + : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel { + + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, + !if (!eq(ImmOpStr, "u16_2Imm"), 18, + !if (!eq(ImmOpStr, "u16_1Imm"), 17, + /* u16_0Imm */ 16))); + + let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3, + !if (!eq(ImmOpStr, "u16_2Imm"), 2, + !if (!eq(ImmOpStr, "u16_1Imm"), 1, + /* u16_0Imm */ 0))); + } + +//===----------------------------------------------------------------------===// +// Template class for predicated load instructions with +// absolute addressing mode. +//===----------------------------------------------------------------------===// +let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6, + opExtendable = 2 in +class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp, + bit isPredNot, bit isPredNew> + : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr), + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel { + bits<5> dst; + bits<2> src1; + bits<6> absaddr; + + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + + let IClass = 0b1001; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = MajOp; + let Inst{20-16} = absaddr{5-1}; + let Inst{13} = 0b1; + let Inst{12} = isPredNew; + let Inst{11} = isPredNot; + let Inst{10-9} = src1; + let Inst{8} = absaddr{0}; + let Inst{7} = 0b1; + let Inst{4-0} = dst; + } + +//===----------------------------------------------------------------------===// +// Multiclass for the load instructions with absolute addressing mode. +//===----------------------------------------------------------------------===// +multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bits<3> MajOp, + bit PredNot> { + def _abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 0>; + // Predicate new + def new_abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 1>; +} + +let addrMode = Absolute, isExtended = 1 in +multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, bits<3> MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 1, isPredicable = 1 in + def L4_#NAME#_abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>; + + // Predicated + defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>; + defm L4_p#NAME#f : LD_Abs_Pred<mnemonic, RC, MajOp, 1>; + } +} + +let accessSize = ByteAccess, hasNewValue = 1 in { + defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>; + defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>; +} + +let accessSize = HalfWordAccess, hasNewValue = 1 in { + defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>; + defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>; +} + +let accessSize = WordAccess, hasNewValue = 1 in +defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; + +let accessSize = DoubleWordAccess in +defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; + +//===----------------------------------------------------------------------===// +// multiclass for load instructions with GP-relative addressing mode. +// Rx=mem[bhwd](##global) +// Once predicated, these instructions map to absolute addressing mode. +// if ([!]Pv[.new]) Rx=mem[bhwd](##global) +//===----------------------------------------------------------------------===// + +let isAsmParserOnly = 1 in +class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp, + bits<3> MajOp> + : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel { + let BaseOpcode = BaseOp#_abs; + } + +let accessSize = ByteAccess, hasNewValue = 1 in { + def L2_loadrbgp : T_LoadGP<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>; + def L2_loadrubgp : T_LoadGP<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>; +} + +let accessSize = HalfWordAccess, hasNewValue = 1 in { + def L2_loadrhgp : T_LoadGP<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>; + def L2_loadruhgp : T_LoadGP<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>; +} + +let accessSize = WordAccess, hasNewValue = 1 in +def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; + +let accessSize = DoubleWordAccess in +def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; + +def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; +def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; +def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; +def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd +def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>; +def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; + +def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>; +def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; + +// Map from load(globaladdress) -> mem[u][bhwd](#foo) +class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> + : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), + (VT (MI tglobaladdr:$global))>; + +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi8, L2_loadrbgp>; + def: LoadGP_pats <sextloadi8, L2_loadrbgp>; + def: LoadGP_pats <zextloadi8, L2_loadrubgp>; + def: LoadGP_pats <extloadi16, L2_loadrhgp>; + def: LoadGP_pats <sextloadi16, L2_loadrhgp>; + def: LoadGP_pats <zextloadi16, L2_loadruhgp>; + def: LoadGP_pats <load, L2_loadrigp>; + def: LoadGP_pats <load, L2_loadrdgp, i64>; +} + +// When the Interprocedural Global Variable optimizer realizes that a certain +// global variable takes only two constant values, it shrinks the global to +// a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi1, L2_loadrubgp>; + def: LoadGP_pats <zextloadi1, L2_loadrubgp>; +} + +// Transfer global address into a register +def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; +def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>; +def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; + +let AddedComplexity = 30 in { + def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>; + def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>; +} + +let AddedComplexity = 30 in { + def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>; + def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>; + def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>; +} + +// Indexed store word - global address. +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 100 in +def: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>; + +// Load from a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrga, L4_loadrub_abs>; + def: Loada_pat<sextloadi8, i32, addrga, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, addrga, L4_loadrub_abs>; + + def: Loada_pat<extloadi16, i32, addrga, L4_loadruh_abs>; + def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>; + + def: Loada_pat<load, i32, addrga, L4_loadri_abs>; + def: Loada_pat<load, i64, addrga, L4_loadrd_abs>; +} + +// Store to a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrga, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>; + def: Storea_pat<store, I32, addrga, S2_storeriabs>; + def: Storea_pat<store, I64, addrga, S2_storerdabs>; + + def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>; +} + +// i8/i16/i32 -> i64 loads +// We need a complexity of 120 here to override preceding handling of +// zextload. +let AddedComplexity = 120 in { + def: Loadam_pat<extloadi8, i64, addrga, Zext64, L4_loadrub_abs>; + def: Loadam_pat<sextloadi8, i64, addrga, Sext64, L4_loadrb_abs>; + def: Loadam_pat<zextloadi8, i64, addrga, Zext64, L4_loadrub_abs>; + + def: Loadam_pat<extloadi16, i64, addrga, Zext64, L4_loadruh_abs>; + def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>; + def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>; + + def: Loadam_pat<extloadi32, i64, addrga, Zext64, L4_loadri_abs>; + def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>; + def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>; +} + +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrgp, L4_loadrub_abs>; + def: Loada_pat<sextloadi8, i32, addrgp, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, addrgp, L4_loadrub_abs>; + + def: Loada_pat<extloadi16, i32, addrgp, L4_loadruh_abs>; + def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>; + + def: Loada_pat<load, i32, addrgp, L4_loadri_abs>; + def: Loada_pat<load, i64, addrgp, L4_loadrd_abs>; +} + +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>; + def: Storea_pat<store, I32, addrgp, S2_storeriabs>; + def: Storea_pat<store, I64, addrgp, S2_storerdabs>; +} + +def: Loada_pat<atomic_load_8, i32, addrgp, L4_loadrub_abs>; +def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>; +def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>; +def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>; + +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbabs>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>; + +let Constraints = "@earlyclobber $dst" in +def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b, + IntRegs:$c, IntRegs:$d), + ".error \"Should never try to emit Insert4\"", + [(set (i64 DoubleRegs:$dst), + (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))), + (i32 16)), + (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))), + (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))), + (i32 32))), + (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>; + +//===----------------------------------------------------------------------===// +// :raw for of boundscheck:hi:lo insns +//===----------------------------------------------------------------------===// + +// A4_boundscheck_lo: Detect if a register is within bounds. +let hasSideEffects = 0 in +def A4_boundscheck_lo: ALU64Inst < + (outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Pd = boundscheck($Rss, $Rtt):raw:lo"> { + bits<2> Pd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00100; + let Inst{13} = 0b1; + let Inst{7-5} = 0b100; + let Inst{1-0} = Pd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// A4_boundscheck_hi: Detect if a register is within bounds. +let hasSideEffects = 0 in +def A4_boundscheck_hi: ALU64Inst < + (outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Pd = boundscheck($Rss, $Rtt):raw:hi"> { + bits<2> Pd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00100; + let Inst{13} = 0b1; + let Inst{7-5} = 0b101; + let Inst{1-0} = Pd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +let hasSideEffects = 0, isAsmParserOnly = 1 in +def A4_boundscheck : MInst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt), + "$Pd=boundscheck($Rs,$Rtt)">; + +// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry. +let isPredicateLate = 1, hasSideEffects = 0 in +def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd), + (ins DoubleRegs:$Rs, IntRegs:$Rt), + "$Pd = tlbmatch($Rs, $Rt)", + [], "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-23} = 0b00100; + let Inst{20-16} = Rs; + let Inst{13} = 0b1; + let Inst{12-8} = Rt; + let Inst{7-5} = 0b011; + let Inst{1-0} = Pd; + } + +// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH +// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. +// We don't really want either one here. +def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; +def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, + [SDNPHasChain]>; + +// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't +// really do a load. +let hasSideEffects = 1, mayLoad = 0 in +def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), + "dcfetch($Rs + #$u11_3)", + [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)], + "", LD_tc_ld_SLOT0> { + bits<5> Rs; + bits<14> u11_3; + + let IClass = 0b1001; + let Inst{27-21} = 0b0100000; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{10-0} = u11_3{13-3}; +} + +//===----------------------------------------------------------------------===// +// Compound instructions +//===----------------------------------------------------------------------===// + +let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, + isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, + opExtentBits = 11, opExtentAlign = 2, opExtendable = 1, + isTerminator = 1 in +class CJInst_tstbit_R0<string px, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), + ""#px#" = tstbit($Rs, #0); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + bits<4> Rs; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-26} = 0b00; + let Inst{25} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + let Inst{24-23} = 0b11; + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + let Inst{9-8} = 0b11; + let Inst{7-1} = r9_2{8-2}; +} + +let Defs = [PC, P0], Uses = [P0] in { + def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">; + def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">; + def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">; + def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">; +} + +let Defs = [PC, P1], Uses = [P1] in { + def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">; + def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">; + def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">; + def J4_tstbit0_fp1_jump_t : CJInst_tstbit_R0<"p1", 1, "t">; +} + + +let isBranch = 1, hasSideEffects = 0, + isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1, + isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, + opExtendable = 2, isTerminator = 1 in +class CJInst_RR<string px, string op, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2), + ""#px#" = cmp."#op#"($Rs, $Rt); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + bits<4> Rs; + bits<4> Rt; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-23} = !if (!eq(op, "eq"), 0b01000, + !if (!eq(op, "gt"), 0b01001, + !if (!eq(op, "gtu"), 0b01010, 0))); + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + // px: Predicate reg 0/1 + let Inst{12} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + let Inst{11-8} = Rt; + let Inst{7-1} = r9_2{8-2}; +} + +// P[10] taken/not taken. +multiclass T_tnt_CJInst_RR<string op, bit np> { + let Defs = [PC, P0], Uses = [P0] in { + def NAME#p0_jump_nt : CJInst_RR<"p0", op, np, "nt">; + def NAME#p0_jump_t : CJInst_RR<"p0", op, np, "t">; + } + let Defs = [PC, P1], Uses = [P1] in { + def NAME#p1_jump_nt : CJInst_RR<"p1", op, np, "nt">; + def NAME#p1_jump_t : CJInst_RR<"p1", op, np, "t">; + } +} +// Predicate / !Predicate +multiclass T_pnp_CJInst_RR<string op>{ + defm J4_cmp#NAME#_t : T_tnt_CJInst_RR<op, 0>; + defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>; +} +// TypeCJ Instructions compare RR and jump +defm eq : T_pnp_CJInst_RR<"eq">; +defm gt : T_pnp_CJInst_RR<"gt">; +defm gtu : T_pnp_CJInst_RR<"gtu">; + +let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, + isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11, + opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in +class CJInst_RU5<string px, string op, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2), + ""#px#" = cmp."#op#"($Rs, #$U5); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + bits<4> Rs; + bits<5> U5; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-26} = 0b00; + // px: Predicate reg 0/1 + let Inst{25} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + let Inst{24-23} = !if (!eq(op, "eq"), 0b00, + !if (!eq(op, "gt"), 0b01, + !if (!eq(op, "gtu"), 0b10, 0))); + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + let Inst{12-8} = U5; + let Inst{7-1} = r9_2{8-2}; +} +// P[10] taken/not taken. +multiclass T_tnt_CJInst_RU5<string op, bit np> { + let Defs = [PC, P0], Uses = [P0] in { + def NAME#p0_jump_nt : CJInst_RU5<"p0", op, np, "nt">; + def NAME#p0_jump_t : CJInst_RU5<"p0", op, np, "t">; + } + let Defs = [PC, P1], Uses = [P1] in { + def NAME#p1_jump_nt : CJInst_RU5<"p1", op, np, "nt">; + def NAME#p1_jump_t : CJInst_RU5<"p1", op, np, "t">; + } +} +// Predicate / !Predicate +multiclass T_pnp_CJInst_RU5<string op>{ + defm J4_cmp#NAME#i_t : T_tnt_CJInst_RU5<op, 0>; + defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>; +} +// TypeCJ Instructions compare RI and jump +defm eq : T_pnp_CJInst_RU5<"eq">; +defm gt : T_pnp_CJInst_RU5<"gt">; +defm gtu : T_pnp_CJInst_RU5<"gtu">; + +let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, + isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, + isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1, + isTerminator = 1 in +class CJInst_Rn1<string px, string op, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), + ""#px#" = cmp."#op#"($Rs,#-1); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + bits<4> Rs; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-26} = 0b00; + let Inst{25} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + + let Inst{24-23} = 0b11; + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + let Inst{9-8} = !if (!eq(op, "eq"), 0b00, + !if (!eq(op, "gt"), 0b01, 0)); + let Inst{7-1} = r9_2{8-2}; +} + +// P[10] taken/not taken. +multiclass T_tnt_CJInst_Rn1<string op, bit np> { + let Defs = [PC, P0], Uses = [P0] in { + def NAME#p0_jump_nt : CJInst_Rn1<"p0", op, np, "nt">; + def NAME#p0_jump_t : CJInst_Rn1<"p0", op, np, "t">; + } + let Defs = [PC, P1], Uses = [P1] in { + def NAME#p1_jump_nt : CJInst_Rn1<"p1", op, np, "nt">; + def NAME#p1_jump_t : CJInst_Rn1<"p1", op, np, "t">; + } +} +// Predicate / !Predicate +multiclass T_pnp_CJInst_Rn1<string op>{ + defm J4_cmp#NAME#n1_t : T_tnt_CJInst_Rn1<op, 0>; + defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>; +} +// TypeCJ Instructions compare -1 and jump +defm eq : T_pnp_CJInst_Rn1<"eq">; +defm gt : T_pnp_CJInst_Rn1<"gt">; + +// J4_jumpseti: Direct unconditional jump and set register to immediate. +let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, + isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, + opExtentAlign = 2, opExtendable = 2 in +def J4_jumpseti: CJInst < + (outs IntRegs:$Rd), + (ins u6Imm:$U6, brtarget:$r9_2), + "$Rd = #$U6 ; jump $r9_2"> { + bits<4> Rd; + bits<6> U6; + bits<11> r9_2; + + let IClass = 0b0001; + let Inst{27-24} = 0b0110; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rd; + let Inst{13-8} = U6; + let Inst{7-1} = r9_2{8-2}; + } + +// J4_jumpsetr: Direct unconditional jump and transfer register. +let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, + isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, + opExtentAlign = 2, opExtendable = 2 in +def J4_jumpsetr: CJInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, brtarget:$r9_2), + "$Rd = $Rs ; jump $r9_2"> { + bits<4> Rd; + bits<4> Rs; + bits<11> r9_2; + + let IClass = 0b0001; + let Inst{27-24} = 0b0111; + let Inst{21-20} = r9_2{10-9}; + let Inst{11-8} = Rd; + let Inst{19-16} = Rs; + let Inst{7-1} = r9_2{8-2}; + } + +// Duplex instructions +//===----------------------------------------------------------------------===// +include "HexagonIsetDx.td" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td new file mode 100644 index 0000000..823961f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -0,0 +1,937 @@ +//=- HexagonInstrInfoV5.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V5 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// XTYPE/MPY +//===----------------------------------------------------------------------===// + + //Rdd[+]=vrmpybsu(Rss,Rtt) +let Predicates = [HasV5T] in { + def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>; + def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>; + + //Rdd[+]=vrmpybu(Rss,Rtt) + def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>; + def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>; + + def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>; + def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>; +} + +// Vector multiply bytes +// Rdd=vmpyb[s]u(Rs,Rt) +let Predicates = [HasV5T] in { + def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>; + def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>; + + // Rxx+=vmpyb[s]u(Rs,Rt) + def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>; + def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>; + + // Rd=vaddhub(Rss,Rtt):sat + let hasNewValue = 1, opNewValue = 0 in + def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>; +} + +def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm, + [(set I64:$dst, + (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)), + (i32 1)))], 1>, + Requires<[HasV5T]> { + bits<6> src2; + let Inst{13-8} = src2; +} + +let isAsmParserOnly = 1 in +def S2_asr_i_p_rnd_goodsyntax + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asrrnd($src1, #$src2)">; + +def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>, + Requires<[HasV5T]> { + let Inst{13,7,4} = 0b111; +} + +def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>, + Requires<[HasV5T]> { + let Inst{20,13,7,4} = 0b1111; +} + +def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, + SDTCisPtrTy<1>]>; +def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; + +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set F32:$dst, + (HexagonFCONST32 tglobaladdr:$global))]>, + Requires<[HasV5T]>; + +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1), + "$dst = CONST64(#$src1)", + [(set F64:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), + "$dst = CONST32(#$src1)", + [(set F32:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +// Transfer immediate float. +// Only works with single precision fp value. +// For double precision, use CONST64_float_real, as 64bit transfer +// can only hold 40-bit values - 32 from const ext + 8 bit immediate. +// Make sure that complexity is more than the CONST32 pattern in +// HexagonInstrInfo.td patterns. +let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1, + isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, + isCodeGenOnly = 1, isPseudo = 1 in +def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1), + "$dst = #$src1", + [(set F32:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0, + validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in +def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32Ext:$src2), + "if ($src1) $dst = #$src2", []>, + Requires<[HasV5T]>; + +let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1, + hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in +def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32Ext:$src2), + "if (!$src1) $dst = #$src2", []>, + Requires<[HasV5T]>; + +def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i64>]>; + +def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; + +let hasNewValue = 1, validSubTargets = HasV5SubT in +def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), + "$Rd = popcount($Rss)", + [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>, + Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rss; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1000011; + let Inst{7-5} = 0b011; + let Inst{4-0} = Rd; + let Inst{20-16} = Rss; + } + +defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>; +defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>; + +defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>; +defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>; +def: Storex_simple_pat<store, F32, S2_storeri_io>; +def: Storex_simple_pat<store, F64, S2_storerd_io>; + +let isFP = 1, hasNewValue = 1, opNewValue = 0 in +class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp> + : MInst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = "#mnemonic#"($Rs, $Rt)", [], + "" , M_tc_3or4x_SLOT23 > , + Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; + } + +let isCommutable = 1 in { + def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>; + def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>; +} + +def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>; + +def: Pat<(f32 (fadd F32:$src1, F32:$src2)), + (F2_sfadd F32:$src1, F32:$src2)>; + +def: Pat<(f32 (fsub F32:$src1, F32:$src2)), + (F2_sfsub F32:$src1, F32:$src2)>; + +def: Pat<(f32 (fmul F32:$src1, F32:$src2)), + (F2_sfmpy F32:$src1, F32:$src2)>; + +let Itinerary = M_tc_3x_SLOT23 in { + def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>; + def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>; +} + +let AddedComplexity = 100, Predicates = [HasV5T] in { + def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), + F32:$src1, F32:$src2)), + (F2_sfmin F32:$src1, F32:$src2)>; + + def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), + F32:$src2, F32:$src1)), + (F2_sfmin F32:$src1, F32:$src2)>; + + def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), + F32:$src1, F32:$src2)), + (F2_sfmax F32:$src1, F32:$src2)>; + + def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), + F32:$src2, F32:$src1)), + (F2_sfmax F32:$src1, F32:$src2)>; +} + +def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>; +def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>; + +// F2_sfrecipa: Reciprocal approximation for division. +let isPredicateLate = 1, isFP = 1, +hasSideEffects = 0, hasNewValue = 1 in +def F2_sfrecipa: MInst < + (outs IntRegs:$Rd, PredRegs:$Pe), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd, $Pe = sfrecipa($Rs, $Rt)">, + Requires<[HasV5T]> { + bits<5> Rd; + bits<2> Pe; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + let Inst{27-21} = 0b1011111; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7} = 0b1; + let Inst{6-5} = Pe; + let Inst{4-0} = Rd; + } + +// F2_dfcmpeq: Floating point compare for equal. +let isCompare = 1, isFP = 1 in +class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp, + list<dag> pattern = [] > + : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2), + "$dst = "#mnemonic#"($src1, $src2)", pattern, + "" , ALU64_tc_2early_SLOT23 > , + Requires<[HasV5T]> { + bits<2> dst; + bits<5> src1; + bits<5> src2; + + let IClass = 0b1101; + + let Inst{27-21} = 0b0010111; + let Inst{20-16} = src1; + let Inst{12-8} = src2; + let Inst{7-5} = MinOp; + let Inst{1-0} = dst; + } + +class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp> + : T_fcmp <mnemonic, DoubleRegs, MinOp, + [(set I1:$dst, (OpNode F64:$src1, F64:$src2))]> { + let IClass = 0b1101; + let Inst{27-21} = 0b0010111; +} + +class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp> + : T_fcmp <mnemonic, IntRegs, MinOp, + [(set I1:$dst, (OpNode F32:$src1, F32:$src2))]> { + let IClass = 0b1100; + let Inst{27-21} = 0b0111111; +} + +def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>; +def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>; +def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>; +def F2_dfcmpuo : T_fcmp64<"dfcmp.uo", setuo, 0b011>; + +def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>; +def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>; +def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>; +def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// + +let Predicates = [HasV5T] in +multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (IntMI F32:$src1, F32:$src2)>; + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (DoubleMI F64:$src1, F64:$src2)>; +} + +defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>; +defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>; +defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>; +defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>; +defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) +// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>; +defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) +// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) +// setne(setolt(op1, op2), 0) -> setogt(op2, op1) +// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + + // DoubleRegs + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; +} + +defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>; + + +// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp +let Predicates = [HasV5T] in { + def: Pat<(i1 (seto F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; + def: Pat<(i1 (seto F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (seto F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; + def: Pat<(i1 (seto F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setolt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)), + (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>; + def: Pat<(i1 (setolt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; +} + +// Unordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setult F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpgt F32:$src2, F32:$src1))>; + def: Pat<(i1 (setult F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (setult F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpgt F64:$src2, F64:$src1))>; + def: Pat<(i1 (setult F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered le. +let Predicates = [HasV5T] in { + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setole F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setole F32:$src1, fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setole F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setole F64:$src1, fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; +} + +// Unordered le. +let Predicates = [HasV5T] in { +// rs <= rt -> rt >= rs. + def: Pat<(i1 (setule F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpge F32:$src2, F32:$src1))>; + def: Pat<(i1 (setule F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (setule F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpge F64:$src2, F64:$src1))>; + def: Pat<(i1 (setule F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setone F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setone F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setone F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; + def: Pat<(i1 (setone F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; +} + +// Unordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setune F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; + def: Pat<(i1 (setune F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; + def: Pat<(i1 (setune F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>; + def: Pat<(i1 (setune F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, + (CONST64_Float_Real fpimm:$src2))))>; +} + +// Besides set[o|u][comparions], we also need set[comparisons]. +let Predicates = [HasV5T] in { + // lt. + def: Pat<(i1 (setlt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)), + (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>; + def: Pat<(i1 (setlt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; + + // le. + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setle F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setle F32:$src1, fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setle F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setle F64:$src1, fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; + + // ne. + def: Pat<(i1 (setne F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setne F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setne F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; + def: Pat<(i1 (setne F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; +} + +// F2 convert template classes: +let isFP = 1 in +class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), + "$Rdd = "#mnemonic#"($Rss)"#chop, + [(set RCOut:$Rdd, (Op RCIn:$Rss))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rdd; + bits<5> Rss; + + let IClass = 0b1000; + + let Inst{27-21} = 0b0000111; + let Inst{20-16} = Rss; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + } + +let isFP = 1 in +class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs), + "$Rdd = "#mnemonic#"($Rs)"#chop, + [(set RCOut:$Rdd, (Op RCIn:$Rs))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rdd; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-21} = 0b0100100; + let Inst{20-16} = Rs; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + } + +let isFP = 1, hasNewValue = 1 in +class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), + "$Rd = "#mnemonic#"($Rss)"#chop, + [(set RCOut:$Rd, (Op RCIn:$Rss))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rd; + bits<5> Rss; + + let IClass = 0b1000; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MinOp; + let Inst{20-16} = Rss; + let Inst{7-5} = 0b001; + let Inst{4-0} = Rd; + } + +let isFP = 1, hasNewValue = 1 in +class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = "#mnemonic#"($Rs)"#chop, + [(set RCOut:$Rd, (Op RCIn:$Rs))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rd; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; + } + +// Convert single precision to double precision and vice-versa. +def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000, + fextend, F64, F32>; + +def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000, + fround, F32, F64>; + +// Convert Integer to Floating Point. +def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010, + sint_to_fp, F32, I64>; +def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001, + uint_to_fp, F32, I64>; +def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000, + uint_to_fp, F32, I32>; +def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000, + sint_to_fp, F32, I32>; +def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011, + sint_to_fp, F64, I64>; +def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010, + uint_to_fp, F64, I64>; +def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001, + uint_to_fp, F64, I32>; +def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010, + sint_to_fp, F64, I32>; + +// Convert Floating Point to Integer - default. +def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, + fp_to_uint, I32, F64, ":chop">; +def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, + fp_to_sint, I32, F64, ":chop">; +def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001, + fp_to_uint, I32, F32, ":chop">; +def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001, + fp_to_sint, I32, F32, ":chop">; +def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, + fp_to_sint, I64, F64, ":chop">; +def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, + fp_to_uint, I64, F64, ":chop">; +def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, + fp_to_sint, I64, F32, ":chop">; +def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, + fp_to_uint, I64, F32, ":chop">; + +// Convert Floating Point to Integer: non-chopped. +let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in { + def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000, + fp_to_sint, I64, F64>; + def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001, + fp_to_uint, I64, F64>; + def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011, + fp_to_uint, I64, F32>; + def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100, + fp_to_sint, I64, F32>; + def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011, + fp_to_uint, I32, F64>; + def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100, + fp_to_sint, I32, F64>; + def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000, + fp_to_uint, I32, F32>; + def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000, + fp_to_sint, I32, F32>; +} + +// Fix up radicand. +let isFP = 1, hasNewValue = 1 in +def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = sffixupr($Rs)", + [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = Rs; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rd; + } + +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +let Predicates = [HasV5T] in { + def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; + def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; + def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; + def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; +} + +// F2_sffma: Floating-point fused multiply add. +let isFP = 1, hasNewValue = 1 in +class T_sfmpy_acc <bit isSub, bit isLib> + : MInst<(outs IntRegs:$Rx), + (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""), + [], "$dst2 = $Rx" , M_tc_3_SLOT23 > , + Requires<[HasV5T]> { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-21} = 0b1111000; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7} = 0b1; + let Inst{6} = isLib; + let Inst{5} = isSub; + let Inst{4-0} = Rx; + } + +def F2_sffma: T_sfmpy_acc <0, 0>; +def F2_sffms: T_sfmpy_acc <1, 0>; +def F2_sffma_lib: T_sfmpy_acc <0, 1>; +def F2_sffms_lib: T_sfmpy_acc <1, 1>; + +def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)), + (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; + +// Floating-point fused multiply add w/ additional scaling (2**pu). +let isFP = 1, hasNewValue = 1 in +def F2_sffma_sc: MInst < + (outs IntRegs:$Rx), + (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu), + "$Rx += sfmpy($Rs, $Rt, $Pu):scale" , + [], "$dst2 = $Rx" , M_tc_3_SLOT23 > , + Requires<[HasV5T]> { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + bits<2> Pu; + + let IClass = 0b1110; + + let Inst{27-21} = 0b1111011; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7} = 0b1; + let Inst{6-5} = Pu; + let Inst{4-0} = Rx; + } + +let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3, + isPseudo = 1, InputType = "imm" in +def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3), + "$dst = mux($src1, $src2, #$src3)", + [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>, + Requires<[HasV5T]>; + +let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2, + isPseudo = 1, InputType = "imm" in +def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3), + "$dst = mux($src1, #$src2, $src3)", + [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F32:$src2, F32:$src3), + (C2_mux I1:$src1, F32:$src2, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), + (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F64:$src2, F64:$src3), + (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), + (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = MUX_ir_f(p0, #i, r1) +def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3), + (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = MUX_ri_f(p0, r1, #i) +def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3), + (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>, + Requires<[HasV5T]>; + +def: Pat<(i32 (fp_to_sint F64:$src1)), + (LoReg (F2_conv_df2d_chop F64:$src1))>, + Requires<[HasV5T]>; + +//===----------------------------------------------------------------------===// +// :natural forms of vasrh and vasrhub insns +//===----------------------------------------------------------------------===// +// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round, +// saturate, and pack. +let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_ASRHUB<bit isSat> + : SInst <(outs IntRegs:$Rd), + (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"), + [], "", S_2op_tc_2_SLOT23>, + Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rss; + bits<4> u4; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1000011; + let Inst{20-16} = Rss; + let Inst{13-12} = 0b00; + let Inst{11-8} = u4; + let Inst{7-6} = 0b10; + let Inst{5} = isSat; + let Inst{4-0} = Rd; + } + +def S5_asrhub_rnd_sat : T_ASRHUB <0>; +def S5_asrhub_sat : T_ASRHUB <1>; + +let isAsmParserOnly = 1 in +def S5_asrhub_rnd_sat_goodsyntax + : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>; + +// S5_vasrhrnd: Vector arithmetic shift right by immediate with round. +let hasSideEffects = 0 in +def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rdd = vasrh($Rss, #$u4):raw">, + Requires<[HasV5T]> { + bits<5> Rdd; + bits<5> Rss; + bits<4> u4; + + let IClass = 0b1000; + + let Inst{27-21} = 0b0000001; + let Inst{20-16} = Rss; + let Inst{13-12} = 0b00; + let Inst{11-8} = u4; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rdd; + } + +let isAsmParserOnly = 1 in +def S5_vasrhrnd_goodsyntax + : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>; + +// Floating point reciprocal square root approximation +let Uses = [USR], isPredicateLate = 1, isFP = 1, + hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, + validSubTargets = HasV5SubT in +def F2_sfinvsqrta: SInst < + (outs IntRegs:$Rd, PredRegs:$Pe), + (ins IntRegs:$Rs), + "$Rd, $Pe = sfinvsqrta($Rs)" > , + Requires<[HasV5T]> { + bits<5> Rd; + bits<2> Pe; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1011111; + let Inst{20-16} = Rs; + let Inst{7} = 0b0; + let Inst{6-5} = Pe; + let Inst{4-0} = Rd; + } + +// Complex multiply 32x16 +let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { + def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>; + def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>; +} + +// Classify floating-point value +let isFP = 1 in + def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>; + +let isFP = 1 in +def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5), + "$Pd = dfclass($Rss, #$u5)", + [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> { + bits<2> Pd; + bits<5> Rss; + bits<5> u5; + + let IClass = 0b1101; + let Inst{27-21} = 0b1100100; + let Inst{20-16} = Rss; + let Inst{12-10} = 0b000; + let Inst{9-5} = u5; + let Inst{4-3} = 0b10; + let Inst{1-0} = Pd; + } + +// Instructions to create floating point constant +class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg> + : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src), + "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"), + [], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> { + bits<5> dst; + bits<10> src; + + let IClass = 0b1101; + let Inst{27-24} = RegType; + let Inst{23} = 0b0; + let Inst{22} = isNeg; + let Inst{21} = src{9}; + let Inst{13-5} = src{8-0}; + let Inst{4-0} = dst; + } + +let hasNewValue = 1, opNewValue = 0 in { +def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>; +def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>; +} + +def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>; +def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>; + +def : Pat <(fabs (f32 IntRegs:$src1)), + (S2_clrbit_i (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; + +def : Pat <(fneg (f32 IntRegs:$src1)), + (S2_togglebit_i (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td new file mode 100644 index 0000000..897ada0 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -0,0 +1,2241 @@ +//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +// Vector store +let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +{ + class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_ST, + IType type = TypeCVI_VM_ST> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon; + +} + +// Vector load +let Predicates = [HasV60T, UseHVX] in +let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in + class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_LD, + IType type = TypeCVI_VM_LD> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; + +let Predicates = [HasV60T, UseHVX] in +let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_ST, + IType type = TypeCVI_VM_ST> +: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; + +//===----------------------------------------------------------------------===// +// Vector loads with base + immediate offset +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, accessSize = Vector64Access in +class T_vload_ai<string asmStr> + : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2), + asmStr>; + +let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in +class T_vload_ai_128B<string asmStr> + : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2), + asmStr>; + +let isCVLoadable = 1, hasNewValue = 1 in { + def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">, + V6_vL32b_ai_enc; + def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">, + V6_vL32b_nt_ai_enc; + // 128B + def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">, + V6_vL32b_ai_128B_enc; + def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">, + V6_vL32b_nt_ai_128B_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in { + def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">, + V6_vL32Ub_ai_enc; + def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">, + V6_vL32Ub_ai_128B_enc; +} + +let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1, + hasNewValue = 1 in { + def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">, + V6_vL32b_cur_ai_enc; + def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">, + V6_vL32b_nt_cur_ai_enc; + // 128B + def V6_vL32b_cur_ai_128B : T_vload_ai_128B + <"$dst.cur = vmem($src1+#$src2)">, + V6_vL32b_cur_ai_128B_enc; + def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B + <"$dst.cur = vmem($src1+#$src2):nt">, + V6_vL32b_nt_cur_ai_128B_enc; +} + + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in { + def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_tmp_ai_enc; + def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">, + V6_vL32b_nt_tmp_ai_enc; + // 128B + def V6_vL32b_tmp_ai_128B : T_vload_ai_128B + <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_tmp_ai_128B_enc; + def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B + <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_nt_tmp_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - unconditional +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, accessSize = Vector64Access in +class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isNT> + : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">, + V6_vS32b_ai_enc; + def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">, + V6_vS32b_ai_128B_enc; +} + +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>, + V6_vS32b_nt_ai_enc; + def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>, + V6_vS32b_nt_ai_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">, + V6_vS32Ub_ai_enc; + def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">, + V6_vS32Ub_ai_128B_enc; +} +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - unconditional new +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1, + Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in +class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> + : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_ai_64B <string baseOp, bit isNT = 0> + : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_ai_128B <string baseOp, bit isNT = 0> + : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>; + +def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc; +def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">, + V6_vS32b_new_ai_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>, + V6_vS32b_nt_new_ai_enc; + def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>, + V6_vS32b_nt_new_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - conditional +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isPredicated = 1 in +class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) " + #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_pred_ai_64B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pred_ai_128B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, + isPredNot, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">, + V6_vS32b_pred_ai_enc; + def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>, + V6_vS32b_npred_ai_enc; + // 128B + def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">, + V6_vS32b_pred_ai_128B_enc; + def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>, + V6_vS32b_npred_ai_128B_enc; +} +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>, + V6_vS32b_nt_pred_ai_enc; + def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>, + V6_vS32b_nt_npred_ai_enc; + // 128B + def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B + <"vmem", "vS32b_ai", 0, 1>, + V6_vS32b_nt_pred_ai_128B_enc; + def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B + <"vmem", "vS32b_ai", 1, 1>, + V6_vS32b_nt_npred_ai_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">, + V6_vS32Ub_pred_ai_enc; + def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>, + V6_vS32Ub_npred_ai_enc; + // 128B + def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">, + V6_vS32Ub_pred_ai_128B_enc; + def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>, + V6_vS32Ub_npred_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset in +class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC, + bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs), + (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)" + #!if(isNT, ":nt", "")#" = $src4"> { + let isPredicatedFalse = isPredNot; +} + +let accessSize = Vector64Access in +class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>; + +def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc; +def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>, + V6_vS32b_nqpred_ai_enc; +def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>, + V6_vS32b_nt_qpred_ai_enc; +def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>, + V6_vS32b_nt_nqpred_ai_enc; +// 128B +def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc; +def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>, + V6_vS32b_nqpred_ai_128B_enc; +def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>, + V6_vS32b_nt_qpred_ai_128B_enc; +def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>, + V6_vS32b_nt_nqpred_ai_128B_enc; + + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - conditional new +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3, + isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in +class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC, + bit isPredNot, bit isNT> + : V6_STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)" + #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, + isPredNot, isNT>; + + +def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">, + V6_vS32b_new_pred_ai_enc; +def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>, + V6_vS32b_new_npred_ai_enc; +// 128B +def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">, + V6_vS32b_new_pred_ai_128B_enc; +def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>, + V6_vS32b_new_npred_ai_128B_enc; +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>, + V6_vS32b_nt_new_pred_ai_enc; + def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>, + V6_vS32b_nt_new_npred_ai_enc; + // 128B + def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B + <"vS32b_ai", 0, 1>, + V6_vS32b_nt_new_pred_ai_128B_enc; + def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B + <"vS32b_ai", 1, 1>, + V6_vS32b_nt_new_npred_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector loads with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc, hasNewValue = 1 in +class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC> + : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2), asmStr, [], + "$src1 = $_dst_">; + +let accessSize = Vector64Access in +class T_vload_pi_64B <string asmStr> + : T_vload_pi <asmStr, s3_6Imm, VectorRegs>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vload_pi_128B <string asmStr> + : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>; + +let isCVLoadable = 1 in { + def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">, + V6_vL32b_pi_enc; + def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">, + V6_vL32b_nt_pi_enc; + // 128B + def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">, + V6_vL32b_pi_128B_enc; + def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">, + V6_vL32b_nt_pi_128B_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in { + def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">, + V6_vL32Ub_pi_enc; + // 128B + def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">, + V6_vL32Ub_pi_128B_enc; +} + +let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in { + def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">, + V6_vL32b_cur_pi_enc; + def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">, + V6_vL32b_nt_cur_pi_enc; + // 128B + def V6_vL32b_cur_pi_128B : T_vload_pi_128B + <"$dst.cur = vmem($src1++#$src2)">, + V6_vL32b_cur_pi_128B_enc; + def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B + <"$dst.cur = vmem($src1++#$src2):nt">, + V6_vL32b_nt_cur_pi_128B_enc; +} + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { + def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">, + V6_vL32b_tmp_pi_enc; + def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">, + V6_vL32b_nt_tmp_pi_enc; + //128B + def V6_vL32b_tmp_pi_128B : T_vload_pi_128B + <"$dst.tmp = vmem($src1++#$src2)">, + V6_vL32b_tmp_pi_128B_enc; + def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B + <"$dst.tmp = vmem($src1++#$src2):nt">, + V6_vL32b_nt_tmp_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc in +class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isNT> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [], + "$src1 = $_dst_">, NewValueRel; + +let accessSize = Vector64Access in +class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0> + : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc; + def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">, + V6_vS32b_pi_128B_enc; +} + +let isNVStorable = 1 , isNonTemporal = 1 in { + def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>, + V6_vS32b_nt_pi_enc; + def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>, + V6_vS32b_nt_pi_128B_enc; +} + + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pi_enc; + def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment unconditional .new vector stores with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc, isNVStore = 1 in +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, + opNewValue = 3, isNVStore = 1 in +class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [], + "$src1 = $_dst_">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pi_64B <string baseOp, bit isNT = 0> + : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pi_128B <string baseOp, bit isNT = 0> + : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>; + + +def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">, + V6_vS32b_new_pi_enc; +def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">, + V6_vS32b_new_pi_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>, + V6_vS32b_nt_new_pi_enc; + def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>, + V6_vS32b_nt_new_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment conditional vector stores with immediate offset +//===----------------------------------------------------------------------===// +let isPredicated = 1, addrMode = PostInc in +class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp, + RegisterClass RC, bit isPredNot, bit isNT> + : V6_STInst<(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_pred_pi_64B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pred_pi_128B <string mnemonic, string baseOp, + bit isPredNot = 0, bit isNT = 0> + : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B, + isPredNot, isNT>; + +let isNVStorable = 1 in { + def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">, + V6_vS32b_pred_pi_enc; + def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>, + V6_vS32b_npred_pi_enc; + // 128B + def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">, + V6_vS32b_pred_pi_128B_enc; + def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>, + V6_vS32b_npred_pi_128B_enc; +} +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>, + V6_vS32b_nt_pred_pi_enc; + def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>, + V6_vS32b_nt_npred_pi_enc; + // 128B + def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B + <"vmem", "vS32b_pi", 0, 1>, + V6_vS32b_nt_pred_pi_128B_enc; + def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B + <"vmem", "vS32b_pi", 1, 1>, + V6_vS32b_nt_npred_pi_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pred_pi_enc; + def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>, + V6_vS32Ub_npred_pi_enc; + // 128B + def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pred_pi_128B_enc; + def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>, + V6_vS32Ub_npred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with immediate offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +let addrMode = PostInc in +class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0, + bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">; + +let accessSize = Vector64Access in +class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0> + : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>; + +def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc; +def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc; +// 128B +def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B, + V6_vS32b_qpred_pi_128B_enc; +def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>, + V6_vS32b_nqpred_pi_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>, + V6_vS32b_nt_qpred_pi_enc; + def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>, + V6_vS32b_nt_nqpred_pi_enc; + // 128B + def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>, + V6_vS32b_nt_qpred_pi_128B_enc; + def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>, + V6_vS32b_nt_nqpred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with immediate offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1, + isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in +class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC, + bit isPredNot, bit isNT> + : V6_STInst <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4.new", [], + "$src2 = $_dst_"> , NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0> + : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, + isPredNot, isNT>; + +def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">, + V6_vS32b_new_pred_pi_enc; +def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>, + V6_vS32b_new_npred_pi_enc; +// 128B +def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">, + V6_vS32b_new_pred_pi_128B_enc; +def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>, + V6_vS32b_new_npred_pi_128B_enc; +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>, + V6_vS32b_nt_new_pred_pi_enc; + def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>, + V6_vS32b_nt_new_npred_pi_enc; + // 128B + def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B + <"vS32b_pi", 0, 1>, + V6_vS32b_nt_new_pred_pi_128B_enc; + def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B + <"vS32b_pi", 1, 1>, + V6_vS32b_nt_new_npred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector loads with register offset +//===----------------------------------------------------------------------===// +let hasNewValue = 1 in +class T_vload_ppu<string asmStr> + : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2), asmStr, [], + "$src1 = $_dst_">, NewValueRel; + +let isCVLoadable = 1 in { + def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">, + V6_vL32b_ppu_enc; + def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">, + V6_vL32b_nt_ppu_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in +def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">, + V6_vL32Ub_ppu_enc; + +let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in { + def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">, + V6_vL32b_cur_ppu_enc; + def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">, + V6_vL32b_nt_cur_ppu_enc; +} + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { + def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">, + V6_vL32b_tmp_ppu_enc; + def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">, + V6_vL32b_nt_tmp_ppu_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with register offset +//===----------------------------------------------------------------------===// +class T_vstore_ppu <string mnemonic, bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), + mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [], + "$src1 = $_dst_">, NewValueRel; + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_ppu : T_vstore_ppu <"vmem">, + V6_vS32b_ppu_enc; + let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in + def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>, + V6_vS32b_nt_ppu_enc; +} + +let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in +def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment .new vector stores with register offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, + opNewValue = 3, isNVStore = 1 in +class T_vstore_new_ppu <bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), + "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [], + "$src1 = $_dst_">, NewValueRel; + +let BaseOpcode = "vS32b_ppu" in +def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc; + +let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in +def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with register offset +//===----------------------------------------------------------------------===// +let isPredicated = 1 in +class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0> + : V6_STInst<(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; +} + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc; + def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc; +} + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in { + def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>, + V6_vS32b_nt_pred_ppu_enc; + def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>, + V6_vS32b_nt_npred_ppu_enc; +} + +let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, + Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">, + V6_vS32Ub_pred_ppu_enc; + def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>, + V6_vS32Ub_npred_ppu_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with register offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel; + +def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc; +def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc; +def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>, + V6_vS32b_nt_qpred_ppu_enc; +def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>, + V6_vS32b_nt_nqpred_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with register offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1, + isNewValue = 1, opNewValue = 4, isNVStore = 1 in +class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0> + : V6_STInst <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4.new", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; +} + +let BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu, + V6_vS32b_new_pred_ppu_enc; + def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>, + V6_vS32b_new_npred_ppu_enc; +} + +let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in { +def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>, + V6_vS32b_nt_new_pred_ppu_enc; +def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>, + V6_vS32b_nt_new_npred_ppu_enc; +} + +let isPseudo = 1, validSubTargets = HasV60SubT in +class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>: + VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src), + #mnemonic#"($addr+#$off) = $src", []>; + +def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>, + Requires<[HasV60T, UseHVXSgl]>; +def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>, + Requires<[HasV60T, UseHVXDbl]>; + +multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; + + def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (STrivv_indexed_128B IntRegs:$addr, #0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : STrivv_pats <v128i8, v256i8>; +defm : STrivv_pats <v64i16, v128i16>; +defm : STrivv_pats <v32i32, v64i32>; +defm : STrivv_pats <v16i64, v32i64>; + + +multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { + // Aligned stores + def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // 128B Aligned stores + def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + + // Fold Add R+IFF into vector store. + let AddedComplexity = 10 in + def : Pat<(store (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // Fold Add R+IFF into vector store 128B. + let AddedComplexity = 10 in + def : Pat<(store (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : vS32b_ai_pats <v64i8, v128i8>; +defm : vS32b_ai_pats <v32i16, v64i16>; +defm : vS32b_ai_pats <v16i32, v32i32>; +defm : vS32b_ai_pats <v8i64, v16i64>; + +let isPseudo = 1, validSubTargets = HasV60SubT in +class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC> + : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off), + "$dst="#mnemonic#"($addr+#$off)", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>; +def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>; + +multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat < (VTSgl (load IntRegs:$addr)), + (LDrivv_indexed IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; + + def : Pat < (VTDbl (load IntRegs:$addr)), + (LDrivv_indexed_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; +} + +defm : LDrivv_pats <v128i8, v256i8>; +defm : LDrivv_pats <v64i16, v128i16>; +defm : LDrivv_pats <v32i32, v64i32>; +defm : LDrivv_pats <v16i64, v32i64>; + +multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { + // Aligned loads + def : Pat < (VTSgl (load IntRegs:$addr)), + (V6_vL32b_ai IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; + + // 128B Load + def : Pat < (VTDbl (load IntRegs:$addr)), + (V6_vL32b_ai_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; + + // Fold Add R+IFF into vector load. + let AddedComplexity = 10 in + def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + + let AddedComplexity = 10 in + def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; +} + +defm : vL32b_ai_pats <v64i8, v128i8>; +defm : vL32b_ai_pats <v32i16, v64i16>; +defm : vL32b_ai_pats <v16i32, v32i32>; +defm : vL32b_ai_pats <v8i64, v16i64>; + +// Store vector predicate pseudo. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STriq_pred_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def STriq_pred_vec_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def STriq_pred_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; + +def STriq_pred_vec_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Load vector predicate pseudo. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Store vector pseudo. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STriv_pseudo_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def STriv_pseudo_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STrivv_pseudo_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def STrivv_pseudo_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Load vector pseudo. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = "", InstrItinClass itin = CVI_VA_DV, + IType type = TypeCVI_VA_DV> + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>; + +let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst), + (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst), + (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +} + +def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs), + (v16i32 VectorRegs:$tval), + (v16i32 VectorRegs:$fval), SETEQ)), + (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs), + (i32 IntRegs:$rhs))), + (v16i32 VectorRegs:$tval), + (v16i32 VectorRegs:$fval)))>; + + +let hasNewValue = 1 in +class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2), + asmString >; + +multiclass T_vmpy <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_vmpy <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_vmpy_VV <string asmString>: + T_vmpy <asmString, VectorRegs, VectorRegs>; + +multiclass T_vmpy_WW <string asmString>: + T_vmpy <asmString, VecDblRegs, VecDblRegs>; + +multiclass T_vmpy_VW <string asmString>: + T_vmpy <asmString, VectorRegs, VecDblRegs>; + +multiclass T_vmpy_WV <string asmString>: + T_vmpy <asmString, VecDblRegs, VectorRegs>; + +defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc; +defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc; +defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc; +defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc; +defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc; +defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc; +defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc; +defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc; +defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc; +defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc; +defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc; + +let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in +defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc; + +defm V6_vdmpybus_dv : + T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc; +defm V6_vdmpyhsusat : + T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc; +defm V6_vdmpyhsuisat : + T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc; +defm V6_vdmpyhsat : + T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc; +defm V6_vdmpyhisat : + T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc; +defm V6_vdmpyhb_dv : + T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc; +defm V6_vmpyhss : + T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc; +defm V6_vmpyhsrs : + T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in +defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc; + +let Itinerary = CVI_VX, Type = TypeCVI_VX in { +defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc; +defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc; +defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc; +defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc; +defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc; +defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc; +defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc; +defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc; +defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc; +defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc; +} + +let hasNewValue = 1 in +class T_HVX_alu <string asmString, InstrItinClass itin, + RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2), + asmString >{ + let Itinerary = itin; + let Type = !cast<IType>("Type"#itin); +} + +multiclass T_HVX_alu <string asmString, RegisterClass RCout, + RegisterClass RCin, InstrItinClass itin> { + def NAME : T_HVX_alu <asmString, itin, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu <asmString, itin, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_alu_VV <string asmString>: + T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>; + +multiclass T_HVX_alu_WW <string asmString>: + T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>; + +multiclass T_HVX_alu_WV <string asmString>: + T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>; + + +let Itinerary = CVI_VX, Type = TypeCVI_VX in { +defm V6_vrmpyubv : + T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc; +defm V6_vrmpybv : + T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc; +defm V6_vrmpybusv : + T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc; +defm V6_vabsdiffub : + T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc; +defm V6_vabsdiffh : + T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc; +defm V6_vabsdiffuh : + T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc; +defm V6_vabsdiffw : + T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc; +} + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vdmpyhvsat : + T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc; +defm V6_vmpyhvsrs : + T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc; +defm V6_vmpyih : + T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc; +} + +defm V6_vand : + T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc; +defm V6_vor : + T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc; +defm V6_vxor : + T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc; +defm V6_vaddw : + T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc; +defm V6_vaddubsat : + T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc; +defm V6_vadduhsat : + T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc; +defm V6_vaddhsat : + T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc; +defm V6_vaddwsat : + T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc; +defm V6_vsubb : + T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc; +defm V6_vsubh : + T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc; +defm V6_vsubw : + T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc; +defm V6_vsububsat : + T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc; +defm V6_vsubuhsat : + T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc; +defm V6_vsubhsat : + T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc; +defm V6_vsubwsat : + T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc; +defm V6_vavgub : + T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc; +defm V6_vavguh : + T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc; +defm V6_vavgh : + T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc; +defm V6_vavgw : + T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc; +defm V6_vnavgub : + T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc; +defm V6_vnavgh : + T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc; +defm V6_vnavgw : + T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc; +defm V6_vavgubrnd : + T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc; +defm V6_vavguhrnd : + T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc; +defm V6_vavghrnd : + T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc; +defm V6_vavgwrnd : + T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc; + +defm V6_vmpybv : + T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc; +defm V6_vmpyubv : + T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc; +defm V6_vmpybusv : + T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc; +defm V6_vmpyhv : + T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc; +defm V6_vmpyuhv : + T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc; +defm V6_vmpyhus : + T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc; +defm V6_vaddubh : + T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc; +defm V6_vadduhw : + T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc; +defm V6_vaddhw : + T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc; +defm V6_vsububh : + T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc; +defm V6_vsubuhw : + T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc; +defm V6_vsubhw : + T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc; + +defm V6_vaddb_dv : + T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc; +defm V6_vaddh_dv : + T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc; +defm V6_vaddw_dv : + T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc; +defm V6_vaddubsat_dv : + T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc; +defm V6_vadduhsat_dv : + T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc; +defm V6_vaddhsat_dv : + T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc; +defm V6_vaddwsat_dv : + T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc; +defm V6_vsubb_dv : + T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc; +defm V6_vsubh_dv : + T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc; +defm V6_vsubw_dv : + T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc; +defm V6_vsububsat_dv : + T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc; +defm V6_vsubuhsat_dv : + T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc; +defm V6_vsubhsat_dv : + T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc; +defm V6_vsubwsat_dv : + T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc; + +let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in { +defm V6_vmpabusv : + T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc; +defm V6_vmpabuuv : + T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc; +} + +let isAccumulator = 1, hasNewValue = 1 in +class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout, + RegisterClass RCin1, RegisterClass RCin2> + : CVI_VA_Resource1 <(outs RCout:$dst), + (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = itin; + let Type = !cast<IType>("Type"#itin); +} + +multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout, + RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > { + def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpyacc <asmString, itin, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin1#"128B"), + !cast<RegisterClass>(RCin2# + !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>; +} + +multiclass T_HVX_vmpyacc_VVR <string asmString>: + T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>; + +multiclass T_HVX_vmpyacc_VWR <string asmString>: + T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_WVR <string asmString>: + T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_WWR <string asmString>: + T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_VVV <string asmString>: + T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>; + +multiclass T_HVX_vmpyacc_WVV <string asmString>: + T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>; + + +defm V6_vtmpyb_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">, + V6_vtmpyb_acc_enc; +defm V6_vtmpybus_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">, + V6_vtmpybus_acc_enc; +defm V6_vtmpyhb_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">, + V6_vtmpyhb_acc_enc; +defm V6_vdmpyhb_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">, + V6_vdmpyhb_acc_enc; +defm V6_vrmpyub_acc : + T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">, + V6_vrmpyub_acc_enc; +defm V6_vrmpybus_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">, + V6_vrmpybus_acc_enc; +defm V6_vdmpybus_acc : + T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">, + V6_vdmpybus_acc_enc; +defm V6_vdmpybus_dv_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">, + V6_vdmpybus_dv_acc_enc; +defm V6_vdmpyhsuisat_acc : + T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">, + V6_vdmpyhsuisat_acc_enc; +defm V6_vdmpyhisat_acc : + T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhisat_acc_enc; +defm V6_vdmpyhb_dv_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">, + V6_vdmpyhb_dv_acc_enc; +defm V6_vmpybus_acc : + T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">, + V6_vmpybus_acc_enc; +defm V6_vmpabus_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">, + V6_vmpabus_acc_enc; +defm V6_vmpahb_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">, + V6_vmpahb_acc_enc; +defm V6_vmpyhsat_acc : + T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">, + V6_vmpyhsat_acc_enc; +defm V6_vmpyuh_acc : + T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">, + V6_vmpyuh_acc_enc; +defm V6_vmpyiwb_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">, + V6_vmpyiwb_acc_enc; +defm V6_vdsaduh_acc : + T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">, + V6_vdsaduh_acc_enc; +defm V6_vmpyihb_acc : + T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">, + V6_vmpyihb_acc_enc; +defm V6_vmpyub_acc : + T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">, + V6_vmpyub_acc_enc; + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vdmpyhsusat_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">, + V6_vdmpyhsusat_acc_enc; +defm V6_vdmpyhsat_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhsat_acc_enc; +defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR + <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vaslw_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc; +defm V6_vasrw_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc; +} + +defm V6_vdmpyhvsat_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhvsat_acc_enc; +defm V6_vmpybusv_acc : + T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">, + V6_vmpybusv_acc_enc; +defm V6_vmpybv_acc : + T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc; +defm V6_vmpyhus_acc : + T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc; +defm V6_vmpyhv_acc : + T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc; +defm V6_vmpyiewh_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">, + V6_vmpyiewh_acc_enc; +defm V6_vmpyiewuh_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">, + V6_vmpyiewuh_acc_enc; +defm V6_vmpyih_acc : + T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc; +defm V6_vmpyowh_rnd_sacc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">, + V6_vmpyowh_rnd_sacc_enc; +defm V6_vmpyowh_sacc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">, + V6_vmpyowh_sacc_enc; +defm V6_vmpyubv_acc : + T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">, + V6_vmpyubv_acc_enc; +defm V6_vmpyuhv_acc : + T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">, + V6_vmpyuhv_acc_enc; +defm V6_vrmpybusv_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">, + V6_vrmpybusv_acc_enc; +defm V6_vrmpybv_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc; +defm V6_vrmpyubv_acc : + T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">, + V6_vrmpyubv_acc_enc; + + +class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = CVI_VA; + let Type = TypeCVI_VA; +} + +multiclass T_HVX_vcmp <string asmString> { + def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>; +} + +defm V6_veqb_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc; +defm V6_veqh_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc; +defm V6_veqw_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc; +defm V6_vgtb_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc; +defm V6_vgth_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc; +defm V6_vgtw_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc; +defm V6_vgtub_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc; +defm V6_vgtuh_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc; +defm V6_vgtuw_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc; +defm V6_veqb_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc; +defm V6_veqh_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc; +defm V6_veqw_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc; +defm V6_vgtb_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc; +defm V6_vgth_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc; +defm V6_vgtw_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc; +defm V6_vgtub_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc; +defm V6_vgtuh_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc; +defm V6_vgtuw_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc; +defm V6_veqb_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc; +defm V6_veqh_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc; +defm V6_veqw_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc; +defm V6_vgtb_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc; +defm V6_vgth_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc; +defm V6_vgtw_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc; +defm V6_vgtub_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc; +defm V6_vgtuh_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc; +defm V6_vgtuw_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc; + +defm V6_vminub : + T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc; +defm V6_vminuh : + T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc; +defm V6_vminh : + T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc; +defm V6_vminw : + T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc; +defm V6_vmaxub : + T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc; +defm V6_vmaxuh : + T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc; +defm V6_vmaxh : + T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc; +defm V6_vmaxw : + T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc; +defm V6_vshuffeb : + T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc; +defm V6_vshuffob : + T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc; +defm V6_vshufeh : + T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc; +defm V6_vshufoh : + T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc; + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vmpyowh_rnd : + T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">, + V6_vmpyowh_rnd_enc; +defm V6_vmpyiewuh : + T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc; +defm V6_vmpyewuh : + T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc; +defm V6_vmpyowh : + T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc; +defm V6_vmpyiowh : + T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc; +} +let Itinerary = CVI_VX, Type = TypeCVI_VX in +defm V6_vmpyieoh : + T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in { +defm V6_vshufoeh : + T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc; +defm V6_vshufoeb : + T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc; +} + +let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in +defm V6_vcombine : + T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc; + +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; + +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; + +def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), + (v16i32 VectorRegs:$Vt))), + (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), + (v32i32 VecDblRegs:$Vt))), + (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + +let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in { +defm V6_vsathub : + T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc; +defm V6_vsatwh : + T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vroundwh : + T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc; +defm V6_vroundwuh : + T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc; +defm V6_vroundhb : + T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc; +defm V6_vroundhub : + T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc; +defm V6_vasrwv : + T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc; +defm V6_vlsrwv : + T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc; +defm V6_vlsrhv : + T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc; +defm V6_vasrhv : + T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc; +defm V6_vaslwv : + T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc; +defm V6_vaslhv : + T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc; +} + +defm V6_vaddb : + T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc; +defm V6_vaddh : + T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in { +defm V6_vdelta : + T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc; +defm V6_vrdelta : + T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc; +defm V6_vdealb4w : + T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc; +defm V6_vpackeb : + T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc; +defm V6_vpackeh : + T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc; +defm V6_vpackhub_sat : + T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc; +defm V6_vpackhb_sat : + T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc; +defm V6_vpackwuh_sat : + T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc; +defm V6_vpackwh_sat : + T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc; +defm V6_vpackob : + T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc; +defm V6_vpackoh : + T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc; +} + +let hasNewValue = 1, hasSideEffects = 0 in +class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2> + : CVI_VA_Resource1 <(outs RC2:$dst), + (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = CVI_VA; + let Type = TypeCVI_VA; +} + +multiclass T_HVX_condALU <string asmString> { + def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>; +} + +defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">, + V6_vaddbq_enc; +defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">, + V6_vaddhq_enc; +defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">, + V6_vaddwq_enc; +defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">, + V6_vsubbq_enc; +defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">, + V6_vsubhq_enc; +defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">, + V6_vsubwq_enc; +defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">, + V6_vaddbnq_enc; +defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">, + V6_vaddhnq_enc; +defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">, + V6_vaddwnq_enc; +defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">, + V6_vsubbnq_enc; +defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">, + V6_vsubhnq_enc; +defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">, + V6_vsubwnq_enc; + +let hasNewValue = 1 in +class T_HVX_alu_2op <string asmString, InstrItinClass itin, + RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1), + asmString >{ + let Itinerary = itin; + let Type = !cast<IType>("Type"#itin); +} + +multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout, + RegisterClass RCin, InstrItinClass itin> { + def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu_2op <asmString, itin, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +let hasNewValue = 1 in +multiclass T_HVX_alu_2op_VV <string asmString>: + T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>; + +multiclass T_HVX_alu_2op_WV <string asmString>: + T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>; + + +defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">, + V6_vabsh_enc; +defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">, + V6_vabsw_enc; +defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">, + V6_vabsh_sat_enc; +defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">, + V6_vabsw_sat_enc; +defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">, + V6_vnot_enc; +defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">, + V6_vassign_enc; + +defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">, + V6_vzb_enc; +defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">, + V6_vzh_enc; +defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">, + V6_vsb_enc; +defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">, + V6_vsh_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in { +defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">, + V6_vdealh_enc; +defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">, + V6_vdealb_enc; +defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">, + V6_vshuffh_enc; +defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">, + V6_vshuffb_enc; +} + +let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in { +defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">, + V6_vunpackub_enc; +defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">, + V6_vunpackuh_enc; +defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">, + V6_vunpackb_enc; +defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">, + V6_vunpackh_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">, + V6_vcl0w_enc; +defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">, + V6_vcl0h_enc; +defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">, + V6_vnormamtw_enc; +defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">, + V6_vnormamth_enc; +defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">, + V6_vpopcounth_enc; +} + +let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG, + Type = TypeCVI_VX_DV in +class T_HVX_vmpyacc2 <string asmString, RegisterClass RC> + : CVI_VA_Resource1 <(outs RC:$dst), + (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3), + asmString, [], "$dst = $_src_" > ; + + +multiclass T_HVX_vmpyacc2 <string asmString> { + def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>; +} + +defm V6_vrmpybusi_acc : + T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">, + V6_vrmpybusi_acc_enc; +defm V6_vrsadubi_acc : + T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">, + V6_vrsadubi_acc_enc; +defm V6_vrmpyubi_acc : + T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">, + V6_vrmpyubi_acc_enc; + + +let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in +class T_HVX_vmpy2 <string asmString, RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3), + asmString>; + + +multiclass T_HVX_vmpy2 <string asmString> { + def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>; +} + +defm V6_vrmpybusi : + T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc; +defm V6_vrsadubi : + T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc; +defm V6_vrmpyubi : + T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc; + + +let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS, + hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in +class T_HVX_perm <string asmString, RegisterClass RC> + : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_), + (ins RC:$src1, RC:$src2, IntRegs:$src3), + asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >; + +multiclass T_HVX_perm <string asmString> { + def NAME : T_HVX_perm <asmString, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>; +} + +let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in { + defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc; + defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc; +} + +// Conditional vector move. +let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_HVX_cmov <bit isPredNot, RegisterClass RC> + : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2), + "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> { + let isPredicatedFalse = isPredNot; +} + +multiclass T_HVX_cmov <bit isPredNot = 0> { + def NAME : T_HVX_cmov <isPredNot, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>; +} + +defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc; +defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc; + +// Conditional vector combine. +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1, + hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 < (outs RCout:$dst), + (ins PredRegs:$src1, RCin:$src2, RCin:$src3), + "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> { + let isPredicatedFalse = isPredNot; +} + +multiclass T_HVX_ccombine <bit isPredNot = 0> { + def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>; +} + +defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc; +defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc; + +let hasNewValue = 1 in +class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VX_DV_Resource1<(outs RCout:$dst), + (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), + asmString >; + +multiclass T_HVX_shift <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_HVX_shift <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_shift_VV <string asmString>: + T_HVX_shift <asmString, VectorRegs, VectorRegs>; + +multiclass T_HVX_shift_WV <string asmString>: + T_HVX_shift <asmString, VecDblRegs, VectorRegs>; + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in { +defm V6_valignb : + T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc; +defm V6_vlalignb : + T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vasrwh : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc; +defm V6_vasrwhsat : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">, + V6_vasrwhsat_enc; +defm V6_vasrwhrndsat : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">, + V6_vasrwhrndsat_enc; +defm V6_vasrwuhsat : + T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">, + V6_vasrwuhsat_enc; +defm V6_vasrhubsat : + T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">, + V6_vasrhubsat_enc; +defm V6_vasrhubrndsat : + T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">, + V6_vasrhubrndsat_enc; +defm V6_vasrhbrndsat : + T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">, + V6_vasrhbrndsat_enc; +} + +// Assembler mapped -- alias? +//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc; +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in { +defm V6_vshuffvdd : + T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc; +defm V6_vdealvdd : + T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc; +} + +let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in +class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1), + asmString, [], "$dst = $_src_">; + +multiclass T_HVX_unpack <string asmString> { + def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>; +} + +defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc; +defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc; + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1, + hasSideEffects = 0 in +class T_HVX_valign <string asmString, RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3), + asmString>; + +multiclass T_HVX_valign <string asmString> { + def NAME : T_HVX_valign <asmString, VectorRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>; +} + +defm V6_valignbi : + T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc; +defm V6_vlalignbi : + T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in +class T_HVX_predAlu <string asmString, RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2), + asmString>; + +multiclass T_HVX_predAlu <string asmString> { + def NAME : T_HVX_predAlu <asmString, VecPredRegs>; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>; +} + +defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc; +defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc; +defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc; +defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc; +defm V6_pred_and_n : + T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc; + +let Itinerary = CVI_VA, Type = TypeCVI_VA in +class T_HVX_prednot <RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1), + "$dst = not($src1)">, V6_pred_not_enc; + +def V6_pred_not : T_HVX_prednot <VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>; + +let Itinerary = CVI_VA, Type = TypeCVI_VA in +class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2), + asmString >; + +multiclass T_HVX_vcmp2 <string asmString> { + def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>; +} + +defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc; +defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc; +defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc; +defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc; +defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc; +defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc; +defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc; +defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc; +defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc; + +let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in +class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, IntRegs:$src2), + "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc; + +def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>; + +let isAccumulator = 1 in +class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, IntRegs:$src2), + "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc; + +def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>; +let isCodeGenOnly = 1 in +def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>; + +let hasNewValue = 1, hasSideEffects = 0 in +class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCin:$src1, IntRegs:$src2), + "$dst = vand($src1,$src2)" >, V6_vandqrt_enc; + +def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>; + +let hasNewValue = 1, hasSideEffects = 0 in +class T_V6_lvsplatw <RegisterClass RC> + : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1), + "$dst = vsplat($src1)" >, V6_lvsplatw_enc; + +def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>; +let isCodeGenOnly = 1 in +def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>; + + +let hasNewValue = 1 in +class T_V6_vinsertwr <RegisterClass RC> + : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1), + "$dst.w = vinsert($src1)", [], "$dst = $_src_">, + V6_vinsertwr_enc; + +def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>; +let isCodeGenOnly = 1 in +def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>; + + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in +class T_V6_pred_scalar2 <RegisterClass RC> + : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1), + "$dst = vsetq($src1)">, V6_pred_scalar2_enc; + +def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>; +let isCodeGenOnly = 1 in +def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>; + +class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin> + : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2), + "$dst = vand($src1,$src2)">, V6_vandvrt_enc; + +def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>; +let isCodeGenOnly = 1 in +def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>; + +let validSubTargets = HasV60SubT in +class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp > + : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>; + +class T_HVX_rol_R <string asmString> + : T_HVX_rol <asmString, IntRegs, u5Imm>; +class T_HVX_rol_P <string asmString> + : T_HVX_rol <asmString, DoubleRegs, u6Imm>; + +def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc; +let hasNewValue = 1, opNewValue = 0 in +def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc; + +let validSubTargets = HasV60SubT in +class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp> + : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2), + asmString, [], "$dst = $_src_" >; + +class T_HVX_rol_acc_P <string asmString> + : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>; + +class T_HVX_rol_acc_R <string asmString> + : T_HVX_rol_acc <asmString, IntRegs, u5Imm>; + +def S6_rol_i_p_nac : + T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc; +def S6_rol_i_p_acc : + T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc; +def S6_rol_i_p_and : + T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc; +def S6_rol_i_p_or : + T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc; +def S6_rol_i_p_xacc : + T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc; + +let hasNewValue = 1, opNewValue = 0 in { +def S6_rol_i_r_nac : + T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc; +def S6_rol_i_r_acc : + T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc; +def S6_rol_i_r_and : + T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc; +def S6_rol_i_r_or : + T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc; +def S6_rol_i_r_xacc : + T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc; +} + +let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in +class T_V6_extractw <RegisterClass RC> + : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2), + "$dst = vextract($src1,$src2)">, V6_extractw_enc; + +def V6_extractw : T_V6_extractw <VectorRegs>; +let isCodeGenOnly = 1 in +def V6_extractw_128B : T_V6_extractw <VectorRegs128B>; + +let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in +class T_sys0op <string asmString> + : ST1Inst <(outs), (ins), asmString>; + +let isSolo = 1, validSubTargets = HasV55SubT in { +def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc; +def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc; +def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc; +} + +class T_sys1op <string asmString, RegisterClass RC> + : ST1Inst <(outs), (ins RC:$src1), asmString>; + +class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>; +class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>; + +let isSoloAX = 1, validSubTargets = HasV55SubT in +def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc; + +let isSolo = 1, validSubTargets = HasV60SubT in { +def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc; +def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc; +} + +let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1, + validSubTargets = HasV55SubT in +def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1), + "$dst = l2locka($src1)">, Y5_l2locka_enc; + +// not defined on etc side. why? +// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc; + +let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1, + hasSideEffects = 0, +validSubTargets = HasV55SubT in +def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2), + (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2), + "$dst1,$dst2 = vacsh($src1,$src2)", [], + "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1, + hasSideEffects = 0 in +class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1, + RegisterClass RCin2> + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>; + +multiclass T_HVX_alu2 <string asmString, RegisterClass RC > { + def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"), + VecPredRegs128B, VectorRegs128B>; +} + +multiclass T_HVX_alu2_V <string asmString> : + T_HVX_alu2 <asmString, VectorRegs>; + +multiclass T_HVX_alu2_W <string asmString> : + T_HVX_alu2 <asmString, VecDblRegs>; + +defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc; + +let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1, + hasSideEffects = 0 in +defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc; + +class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin> + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>; + +multiclass T_HVX_vlutb <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_HVX_vlutb <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_vlutb_V <string asmString> : + T_HVX_vlutb <asmString, VectorRegs, VectorRegs>; + +multiclass T_HVX_vlutb_W <string asmString> : + T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>; + +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in +class T_HVX_vlutb_acc <string asmString, RegisterClass RCout, + RegisterClass RCin> + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3), + asmString, [], "$dst = $_src_">; + +multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout, + RegisterClass RCin> { + def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vlutb_acc<asmString, + !cast<RegisterClass>(RCout#"128B"), + !cast<RegisterClass>(RCin#"128B")>; +} + +multiclass T_HVX_vlutb_acc_V <string asmString> : + T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>; + +multiclass T_HVX_vlutb_acc_W <string asmString> : + T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>; + + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in +defm V6_vlutvvb: + T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc; + +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in +defm V6_vlutvwh: + T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc; + +let hasNewValue = 1 in { + defm V6_vlutvvb_oracc: + T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">, + V6_vlutvvb_oracc_enc; + defm V6_vlutvwh_oracc: + T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">, + V6_vlutvwh_oracc_enc; +} + +// It's a fake instruction and should not be defined? +def S2_cabacencbin + : SInst2<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3), + "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc; + +// Vhist instructions +def V6_vhistq + : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1), + "vhist($src1)">, V6_vhistq_enc; + +def V6_vhist + : CVI_HIST_Resource1 <(outs), (ins), + "vhist" >, V6_vhist_enc; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td new file mode 100644 index 0000000..96dd531 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -0,0 +1,526 @@ +//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon Vector instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; +def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; +def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; +def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; +def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; +def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; +def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; +def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + + +multiclass bitconvert_32<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a IntRegs:$src))), + (b IntRegs:$src)>; + def : Pat <(a (bitconvert (b IntRegs:$src))), + (a IntRegs:$src)>; +} + +multiclass bitconvert_64<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a DoubleRegs:$src))), + (b DoubleRegs:$src)>; + def : Pat <(a (bitconvert (b DoubleRegs:$src))), + (a DoubleRegs:$src)>; +} + +multiclass bitconvert_vec<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VectorRegs:$src))), + (b VectorRegs:$src)>; + def : Pat <(a (bitconvert (b VectorRegs:$src))), + (a VectorRegs:$src)>; +} + +multiclass bitconvert_dblvec<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VecDblRegs:$src))), + (b VecDblRegs:$src)>; + def : Pat <(a (bitconvert (b VecDblRegs:$src))), + (a VecDblRegs:$src)>; +} + +multiclass bitconvert_predvec<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VecPredRegs:$src))), + (b VectorRegs:$src)>; + def : Pat <(a (bitconvert (b VectorRegs:$src))), + (a VecPredRegs:$src)>; +} + +multiclass bitconvert_dblvec128B<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a VecDblRegs128B:$src))), + (b VecDblRegs128B:$src)>; + def : Pat <(a (bitconvert (b VecDblRegs128B:$src))), + (a VecDblRegs128B:$src)>; +} + +// Bit convert vector types. +defm : bitconvert_32<v4i8, i32>; +defm : bitconvert_32<v2i16, i32>; +defm : bitconvert_32<v2i16, v4i8>; + +defm : bitconvert_64<v8i8, i64>; +defm : bitconvert_64<v4i16, i64>; +defm : bitconvert_64<v2i32, i64>; +defm : bitconvert_64<v8i8, v4i16>; +defm : bitconvert_64<v8i8, v2i32>; +defm : bitconvert_64<v4i16, v2i32>; + +defm : bitconvert_vec<v64i8, v16i32>; +defm : bitconvert_vec<v8i64 , v16i32>; +defm : bitconvert_vec<v32i16, v16i32>; + +defm : bitconvert_dblvec<v16i64, v128i8>; +defm : bitconvert_dblvec<v32i32, v128i8>; +defm : bitconvert_dblvec<v64i16, v128i8>; + +defm : bitconvert_dblvec128B<v64i32, v128i16>; +defm : bitconvert_dblvec128B<v256i8, v128i16>; +defm : bitconvert_dblvec128B<v32i64, v128i16>; + +defm : bitconvert_dblvec128B<v64i32, v256i8>; +defm : bitconvert_dblvec128B<v32i64, v256i8>; +defm : bitconvert_dblvec128B<v128i16, v256i8>; + +// Vector shift support. Vector shifting in Hexagon is rather different +// from internal representation of LLVM. +// LLVM assumes all shifts (in vector case) will have the form +// <VT> = SHL/SRA/SRL <VT> by <VT> +// while Hexagon has the following format: +// <VT> = SHL/SRA/SRL <VT> by <IT/i32> +// As a result, special care is needed to guarantee correctness and +// performance. +class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> + : S_2OpInstImm<Str, MajOp, MinOp, u4Imm, + [(set (v4i16 DoubleRegs:$dst), + (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> { + bits<4> src2; + let Inst{11-8} = src2; +} + +class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> + : S_2OpInstImm<Str, MajOp, MinOp, u5Imm, + [(set (v2i32 DoubleRegs:$dst), + (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> { + bits<5> src2; + let Inst{12-8} = src2; +} + +def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; + +def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; + +def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; +def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; +def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; + +def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; +def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; +def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; + + +def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; +def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; + +// Replicate the low 8-bits from 32-bits input register into each of the +// four bytes of 32-bits destination register. +def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; + +// Replicate the low 16-bits from 32-bits input register into each of the +// four halfwords of 64-bits destination register. +def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + + +class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> + : Pat <(Op Type:$Rss, Type:$Rtt), + (MI Type:$Rss, Type:$Rtt)>; + +def: VArith_pat <A2_vaddub, add, V8I8>; +def: VArith_pat <A2_vaddh, add, V4I16>; +def: VArith_pat <A2_vaddw, add, V2I32>; +def: VArith_pat <A2_vsubub, sub, V8I8>; +def: VArith_pat <A2_vsubh, sub, V4I16>; +def: VArith_pat <A2_vsubw, sub, V2I32>; + +def: VArith_pat <A2_and, and, V2I16>; +def: VArith_pat <A2_xor, xor, V2I16>; +def: VArith_pat <A2_or, or, V2I16>; + +def: VArith_pat <A2_andp, and, V8I8>; +def: VArith_pat <A2_andp, and, V4I16>; +def: VArith_pat <A2_andp, and, V2I32>; +def: VArith_pat <A2_orp, or, V8I8>; +def: VArith_pat <A2_orp, or, V4I16>; +def: VArith_pat <A2_orp, or, V2I32>; +def: VArith_pat <A2_xorp, xor, V8I8>; +def: VArith_pat <A2_xorp, xor, V4I16>; +def: VArith_pat <A2_xorp, xor, V2I32>; + +def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; + +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; + + +def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; +def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; + +def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; + +def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; + +// Vector shift words by register +def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; +def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; +def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>; +def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>; + +// Vector shift halfwords by register +def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; +def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; +def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; +def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; + +class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(Op Value:$Rs, I32:$Rt), + (MI Value:$Rs, I32:$Rt)>; + +def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; +def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; +def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; +def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; +def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; +def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; + + +def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; +def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; +def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; + +def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; + + +class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(i1 (Op Value:$Rs, Value:$Rt)), + (MI Value:$Rs, Value:$Rt)>; + +def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; +def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; +def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; + +def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; +def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; +def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; + +def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; + + +class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> + : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), + (MI InVal:$Rs, InVal:$Rt)>; + +def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; + +def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; + + +// Hexagon doesn't have a vector multiply with C semantics. +// Instead, generate a pseudo instruction that gets expaneded into two +// scalar MPYI instructions. +// This is expanded by ExpandPostRAPseudos. +let isPseudo = 1 in +def VMULW : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW\"", + [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; + +let isPseudo = 1 in +def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW_ACC\"", + [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], + "$Rd = $Rx">; + +// Adds two v4i8: Hexagon does not have an insn for this one, so we +// use the double add v8i8, and use only the low part of the result. +def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; + +// Subtract two v4i8: Hexagon does not have an insn for this one, so we +// use the double sub v8i8, and use only the low part of the result. +def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// No 32 bit vector mux. +// +def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; +def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// 64-bit vector mux. +// +def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; + +// +// No 32 bit vector compare. +// +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; + +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; + + +class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, + ValueType CmpTy> + : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), + (InvMI Value:$Rt, Value:$Rs)>; + +// Map from a compare operation to the corresponding instruction with the +// order of operands reversed, e.g. x > y --> cmp.lt(y,x). +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; + +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; + +// Map from vcmpne(Rss) -> !vcmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; + + +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; + +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb + +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh + +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + + +def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; +def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; + +def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; +def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; + +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; + +// Sign extends a v2i8 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; + +// Sign extends a v2i16 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; + + +// Multiplies two v2i16 and returns a v2i32. We are using here the +// saturating multiply, as hexagon does not provide a non saturating +// vector multiply, and saturation does not impact the result that is +// in double precision of the operands. + +// Multiplies two v2i16 vectors: as Hexagon does not have a multiply +// with the C semantics for this one, this pattern uses the half word +// multiply vmpyh that takes two v2i16 and returns a v2i32. This is +// then truncated to fit this back into a v2i16 and to simulate the +// wrap around semantics for unsigned in C. +def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), + (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; + +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), + (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; + +// Multiplies two v4i16 vectors. +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), + (vmpyh (LoReg $Rs), (LoReg $Rt)))>; + +def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), + (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), + (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; + +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; + +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; + +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; + +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; + + +class shuffler<SDNode Op, string Str> + : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), + "$a = " # Str # "($b, $c)", + [(set (i64 DoubleRegs:$a), + (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], + "", S_3op_tc_1_SLOT23>; + +def SDTHexagonBinOp64 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; + +def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; +def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; +def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; +def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; + +class ShufflePat<InstHexagon MI, SDNode Op> + : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), + (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b +def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; + +// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b +def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; + +// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h +def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; + +// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h +def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; + + +// Truncated store from v4i16 to v4i8. +def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; + +// Truncated store from v2i32 to v2i16. +def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; + +def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), + (LoReg $Rs))))>; + +def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; + + +// Zero and sign extended load from v2i8 into v2i16. +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; + +def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; + +def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), + (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; + +def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), + (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td new file mode 100644 index 0000000..b207aaf --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -0,0 +1,1293 @@ +//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V2 Architecture +// Application-Level Specification +// 80-V9418-8 Rev. B +// March 4, 2008 +//===----------------------------------------------------------------------===// + +class T_I_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$Is), + (MI imm:$Is)>; + +class T_R_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs), + (MI I32:$Rs)>; + +class T_P_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs), + (MI DoubleRegs:$Rs)>; + +class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2> + : Pat<(IntID Imm1:$Is, Imm2:$It), + (MI Imm1:$Is, Imm2:$It)>; + +class T_RI_pat <InstHexagon MI, Intrinsic IntID, PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID I32:$Rs, ImmPred:$It), + (MI I32:$Rs, ImmPred:$It)>; + +class T_IR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID ImmPred:$Is, I32:$Rt), + (MI ImmPred:$Is, I32:$Rt)>; + +class T_PI_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID I64:$Rs, imm:$It), + (MI DoubleRegs:$Rs, imm:$It)>; + +class T_RP_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID I32:$Rs, I64:$Rt), + (MI I32:$Rs, DoubleRegs:$Rt)>; + +class T_RR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt), + (MI I32:$Rs, I32:$Rt)>; + +class T_PP_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2> + : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It), + (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>; + +class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred> + : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is), + (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>; + +class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred> + : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs), + (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>; + +class T_RRI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu), + (MI I32:$Rs, I32:$Rt, imm:$Iu)>; + +class T_RII_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu), + (MI I32:$Rs, imm:$It, imm:$Iu)>; + +class T_IRI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu), + (MI imm:$It, I32:$Rs, imm:$Iu)>; + +class T_IRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt), + (MI imm:$Is, I32:$Rs, I32:$Rt)>; + +class T_RIR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt), + (MI I32:$Rs, imm:$Is, I32:$Rt)>; + +class T_RRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru), + (MI I32:$Rs, I32:$Rt, I32:$Ru)>; + +class T_PPI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>; + +class T_PII_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu), + (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>; + +class T_PPP_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>; + +class T_PPR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>; + +class T_PRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru), + (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>; + +class T_PPQ_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>; + +class T_PR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I32:$Rt), + (MI DoubleRegs:$Rs, I32:$Rt)>; + +class T_D_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID (F64:$Rs)), + (MI (F64:$Rs))>; + +class T_DI_pat <InstHexagon MI, Intrinsic IntID, + PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID F64:$Rs, ImmPred:$It), + (MI F64:$Rs, ImmPred:$It)>; + +class T_F_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs), + (MI F32:$Rs)>; + +class T_FI_pat <InstHexagon MI, Intrinsic IntID, + PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID F32:$Rs, ImmPred:$It), + (MI F32:$Rs, ImmPred:$It)>; + +class T_FF_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs, F32:$Rt), + (MI F32:$Rs, F32:$Rt)>; + +class T_DD_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F64:$Rs, F64:$Rt), + (MI F64:$Rs, F64:$Rt)>; + +class T_FFF_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru), + (MI F32:$Rs, F32:$Rt, F32:$Ru)>; + +class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)), + (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>; + +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords +//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] +//===----------------------------------------------------------------------===// + +def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>; +def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>; +def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>; +def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>; +def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>; +def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>; +def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>; +def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>; + +def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>; +def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>; +def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>; +def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>; +def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>; +def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>; +def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>; +def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>; + +def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>; +def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>; +def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>; +def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>; +def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>; +def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>; +def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>; +def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>; + +def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>; +def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>; +def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>; +def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>; +def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>; +def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>; +def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>; +def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>; + +def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>; + + +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the accumulator. +//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// + +def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>; +def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>; +def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>; +def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>; +def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>; +def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>; +def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>; +def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>; + +def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>; +def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>; +def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>; +def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>; +def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>; +def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>; +def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>; +def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>; + +def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>; +def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>; +def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>; +def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>; +def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>; +def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>; +def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>; +def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>; + +def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>; +def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>; +def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>; +def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>; +def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>; +def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>; +def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>; +def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>; + +def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>; + +def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>; + + +//===----------------------------------------------------------------------===// +// Multiply signed/unsigned halfwords with and without saturation and rounding +// into a 64-bits destination register. +//===----------------------------------------------------------------------===// + +def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>; +def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>; +def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>; +def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>; +def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>; +def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>; +def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>; +def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>; + +def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>; +def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>; +def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>; +def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>; +def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>; +def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>; +def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>; +def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>; + +def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>; +def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>; +def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>; +def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>; +def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>; +def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>; +def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>; +def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>; + +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the 64-bit destination register. +//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// + +def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>; +def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>; +def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>; +def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>; + +def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>; +def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>; +def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>; +def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>; + +def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>; +def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>; +def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>; +def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>; + +def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>; +def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>; +def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>; +def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>; + +def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>; +def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>; +def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>; +def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>; + +def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>; +def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>; +def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>; +def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>; + +def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>; +def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>; +def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>; +def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>; + +def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>; +def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>; +def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>; +def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>; + +// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>; +def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>; + +// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>; +def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>; + +// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>; +def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>; + +// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>; +def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>; + +//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyh_s0, int_hexagon_M2_mmpyh_s0>; +def : T_PP_pat <M2_mmpyh_s1, int_hexagon_M2_mmpyh_s1>; +def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>; +def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>; + +//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyl_s0, int_hexagon_M2_mmpyl_s0>; +def : T_PP_pat <M2_mmpyl_s1, int_hexagon_M2_mmpyl_s1>; +def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>; +def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>; + +//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyuh_s0, int_hexagon_M2_mmpyuh_s0>; +def : T_PP_pat <M2_mmpyuh_s1, int_hexagon_M2_mmpyuh_s1>; +def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>; +def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>; + +//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyul_s0, int_hexagon_M2_mmpyul_s0>; +def : T_PP_pat <M2_mmpyul_s1, int_hexagon_M2_mmpyul_s1>; +def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>; +def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>; + +// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32) +def : T_PP_pat <A2_vraddub, int_hexagon_A2_vraddub>; +def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>; + +// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt) +def : T_PP_pat <A2_vrsadub, int_hexagon_A2_vrsadub>; +def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>; + +// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss) +def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>; + +// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss) +def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>; + +// Vector reduce complex multiply real or imaginary: +// Rdd[+]=vrcmpy[ir](Rss,Rtt[*]) +def : T_PP_pat <M2_vrcmpyi_s0, int_hexagon_M2_vrcmpyi_s0>; +def : T_PP_pat <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>; +def : T_PPP_pat <M2_vrcmaci_s0, int_hexagon_M2_vrcmaci_s0>; +def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>; + +def : T_PP_pat <M2_vrcmpyr_s0, int_hexagon_M2_vrcmpyr_s0>; +def : T_PP_pat <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>; +def : T_PPP_pat <M2_vrcmacr_s0, int_hexagon_M2_vrcmacr_s0>; +def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>; + +// Vector reduce halfwords +// Rdd[+]=vrmpyh(Rss,Rtt) +def : T_PP_pat <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>; +def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>; + +//===----------------------------------------------------------------------===// +// Vector Multipy with accumulation +//===----------------------------------------------------------------------===// + +// Vector multiply word by signed half with accumulation +// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>; +def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>; +def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>; +def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>; +def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>; +def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>; +def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>; +def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>; + +// Vector multiply word by unsigned half with accumulation +// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>; +def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>; +def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>; +def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>; +def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>; +def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>; +def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>; +def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>; + +// Vector multiply even halfwords with accumulation +// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat] +def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>; +def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>; +def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>; + +// Vector dual multiply with accumulation +// Rxx+=vdmpy(Rss,Rtt)[:sat] +def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>; +def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>; + +// Vector complex multiply real or imaginary with accumulation +// Rxx+=vcmpy[ir](Rss,Rtt):sat +def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>; +def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>; + +//===----------------------------------------------------------------------===// +// Add/Subtract halfword +// Rd=add(Rt.L,Rs.[HL])[:sat] +// Rd=sub(Rt.L,Rs.[HL])[:sat] +// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16] +// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16] +//===----------------------------------------------------------------------===// + +//Rd=add(Rt.L,Rs.[LH]) +def : T_RR_pat <A2_addh_l16_ll, int_hexagon_A2_addh_l16_ll>; +def : T_RR_pat <A2_addh_l16_hl, int_hexagon_A2_addh_l16_hl>; + +//Rd=add(Rt.L,Rs.[LH]):sat +def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>; +def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>; + +//Rd=sub(Rt.L,Rs.[LH]) +def : T_RR_pat <A2_subh_l16_ll, int_hexagon_A2_subh_l16_ll>; +def : T_RR_pat <A2_subh_l16_hl, int_hexagon_A2_subh_l16_hl>; + +//Rd=sub(Rt.L,Rs.[LH]):sat +def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>; +def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>; + +//Rd=add(Rt.[LH],Rs.[LH]):<<16 +def : T_RR_pat <A2_addh_h16_ll, int_hexagon_A2_addh_h16_ll>; +def : T_RR_pat <A2_addh_h16_lh, int_hexagon_A2_addh_h16_lh>; +def : T_RR_pat <A2_addh_h16_hl, int_hexagon_A2_addh_h16_hl>; +def : T_RR_pat <A2_addh_h16_hh, int_hexagon_A2_addh_h16_hh>; + +//Rd=sub(Rt.[LH],Rs.[LH]):<<16 +def : T_RR_pat <A2_subh_h16_ll, int_hexagon_A2_subh_h16_ll>; +def : T_RR_pat <A2_subh_h16_lh, int_hexagon_A2_subh_h16_lh>; +def : T_RR_pat <A2_subh_h16_hl, int_hexagon_A2_subh_h16_hl>; +def : T_RR_pat <A2_subh_h16_hh, int_hexagon_A2_subh_h16_hh>; + +//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16 +def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>; +def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>; +def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>; +def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>; + +//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16 +def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>; +def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>; +def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>; +def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>; + +// ALU64 / ALU / min max +def : T_RR_pat<A2_max, int_hexagon_A2_max>; +def : T_RR_pat<A2_min, int_hexagon_A2_min>; +def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>; +def : T_RR_pat<A2_minu, int_hexagon_A2_minu>; + +// Shift and accumulate +def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>; +def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>; +def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>; +def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>; +def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>; +def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>; + +def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>; +def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>; +def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>; +def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>; +def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>; +def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>; + +def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>; +def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>; +def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>; +def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>; +def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>; +def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>; + +def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>; +def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>; +def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>; +def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>; +def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>; +def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>; + +def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>; +def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>; +def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>; +def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>; +def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>; +def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>; +def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>; +def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>; + +def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>; +def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>; +def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>; +def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>; +def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>; +def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>; +def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>; +def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>; + +def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>; +def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>; +def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>; +def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>; +def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>; +def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>; +def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>; +def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>; + +def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>; +def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>; +def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>; +def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>; +def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>; +def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>; +def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>; +def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>; + +def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>; +def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>; +def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>; +def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>; +def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>; +def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>; + +def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>; +def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>; +def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>; +def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>; +def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>; +def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>; + +def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>; +def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>; +def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>; +def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>; +def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>; +def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>; + +def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>; +def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>; +def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>; +def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>; +def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>; +def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>; + +def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>; +def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>; +def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>; +def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>; +def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>; +def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>; +def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>; +def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>; + +def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>; +def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>; +def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>; +def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>; +def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>; +def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>; +def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>; +def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>; + +def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>; +def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>; +def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>; +def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>; +def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>; +def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>; +def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>; +def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>; + +def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>; +def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>; +def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>; +def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>; +def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>; +def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>; +def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>; +def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>; + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ +def : T_RR_pat<A2_add, int_hexagon_A2_add>; +def : T_RI_pat<A2_addi, int_hexagon_A2_addi>; +def : T_RR_pat<A2_sub, int_hexagon_A2_sub>; +def : T_IR_pat<A2_subri, int_hexagon_A2_subri>; +def : T_RR_pat<A2_and, int_hexagon_A2_and>; +def : T_RI_pat<A2_andir, int_hexagon_A2_andir>; +def : T_RR_pat<A2_or, int_hexagon_A2_or>; +def : T_RI_pat<A2_orir, int_hexagon_A2_orir>; +def : T_RR_pat<A2_xor, int_hexagon_A2_xor>; +def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>; + +// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32) +def : Pat <(int_hexagon_A2_not (I32:$Rs)), + (A2_subri -1, IntRegs:$Rs)>; + +// Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32) +def : Pat <(int_hexagon_A2_neg IntRegs:$Rs), + (A2_subri 0, IntRegs:$Rs)>; + +// Transfer immediate +def : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is), + (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>; +def : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is), + (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>; + +// Transfer Register/immediate. +def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>; +def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>; +def : T_I_pat <A2_tfrpi, int_hexagon_A2_tfrpi>; + +// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32) +def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src), + (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>; + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ +// Combine +def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>; +def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>; +def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>; +def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>; + +def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>; + +def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))), + (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; + +// Mux +def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>; +def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>; +def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>; + +// Shift halfword +def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; +def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>; +def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>; + +// Sign/zero extend +def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>; +def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>; +def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>; +def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>; + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ +// Compare +def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>; +def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>; +def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>; + +def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>; +def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>; +def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>; + +def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)), + (i32 (C2_cmpgti (I32:$src1), + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; + +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)), + (i32 (C2_cmpgtui (I32:$src1), + (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; + +// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), + (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>; + +def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1), + (I32:$src2))), + (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>; + +def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1), + (I32:$src2))), + (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>; + +/******************************************************************** +* ALU32/VH * +*********************************************************************/ +// Vector add, subtract, average halfwords +def: T_RR_pat<A2_svaddh, int_hexagon_A2_svaddh>; +def: T_RR_pat<A2_svaddhs, int_hexagon_A2_svaddhs>; +def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>; + +def: T_RR_pat<A2_svsubh, int_hexagon_A2_svsubh>; +def: T_RR_pat<A2_svsubhs, int_hexagon_A2_svsubhs>; +def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>; + +def: T_RR_pat<A2_svavgh, int_hexagon_A2_svavgh>; +def: T_RR_pat<A2_svavghs, int_hexagon_A2_svavghs>; +def: T_RR_pat<A2_svnavgh, int_hexagon_A2_svnavgh>; + +/******************************************************************** +* ALU64/ALU * +*********************************************************************/ +def: T_RR_pat<A2_addsat, int_hexagon_A2_addsat>; +def: T_RR_pat<A2_subsat, int_hexagon_A2_subsat>; +def: T_PP_pat<A2_addp, int_hexagon_A2_addp>; +def: T_PP_pat<A2_subp, int_hexagon_A2_subp>; + +def: T_PP_pat<A2_andp, int_hexagon_A2_andp>; +def: T_PP_pat<A2_orp, int_hexagon_A2_orp>; +def: T_PP_pat<A2_xorp, int_hexagon_A2_xorp>; + +def: T_PP_pat<C2_cmpeqp, int_hexagon_C2_cmpeqp>; +def: T_PP_pat<C2_cmpgtp, int_hexagon_C2_cmpgtp>; +def: T_PP_pat<C2_cmpgtup, int_hexagon_C2_cmpgtup>; + +def: T_PP_pat<S2_parityp, int_hexagon_S2_parityp>; +def: T_RR_pat<S2_packhl, int_hexagon_S2_packhl>; + +/******************************************************************** +* ALU64/VB * +*********************************************************************/ +// ALU64 - Vector add +def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddub>; +def : T_PP_pat <A2_vaddubs, int_hexagon_A2_vaddubs>; +def : T_PP_pat <A2_vaddh, int_hexagon_A2_vaddh>; +def : T_PP_pat <A2_vaddhs, int_hexagon_A2_vaddhs>; +def : T_PP_pat <A2_vadduhs, int_hexagon_A2_vadduhs>; +def : T_PP_pat <A2_vaddw, int_hexagon_A2_vaddw>; +def : T_PP_pat <A2_vaddws, int_hexagon_A2_vaddws>; + +// ALU64 - Vector average +def : T_PP_pat <A2_vavgub, int_hexagon_A2_vavgub>; +def : T_PP_pat <A2_vavgubr, int_hexagon_A2_vavgubr>; +def : T_PP_pat <A2_vavgh, int_hexagon_A2_vavgh>; +def : T_PP_pat <A2_vavghr, int_hexagon_A2_vavghr>; +def : T_PP_pat <A2_vavghcr, int_hexagon_A2_vavghcr>; +def : T_PP_pat <A2_vavguh, int_hexagon_A2_vavguh>; +def : T_PP_pat <A2_vavguhr, int_hexagon_A2_vavguhr>; + +def : T_PP_pat <A2_vavgw, int_hexagon_A2_vavgw>; +def : T_PP_pat <A2_vavgwr, int_hexagon_A2_vavgwr>; +def : T_PP_pat <A2_vavgwcr, int_hexagon_A2_vavgwcr>; +def : T_PP_pat <A2_vavguw, int_hexagon_A2_vavguw>; +def : T_PP_pat <A2_vavguwr, int_hexagon_A2_vavguwr>; + +// ALU64 - Vector negative average +def : T_PP_pat <A2_vnavgh, int_hexagon_A2_vnavgh>; +def : T_PP_pat <A2_vnavghr, int_hexagon_A2_vnavghr>; +def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>; +def : T_PP_pat <A2_vnavgw, int_hexagon_A2_vnavgw>; +def : T_PP_pat <A2_vnavgwr, int_hexagon_A2_vnavgwr>; +def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>; + +// ALU64 - Vector max +def : T_PP_pat <A2_vmaxh, int_hexagon_A2_vmaxh>; +def : T_PP_pat <A2_vmaxw, int_hexagon_A2_vmaxw>; +def : T_PP_pat <A2_vmaxub, int_hexagon_A2_vmaxub>; +def : T_PP_pat <A2_vmaxuh, int_hexagon_A2_vmaxuh>; +def : T_PP_pat <A2_vmaxuw, int_hexagon_A2_vmaxuw>; + +// ALU64 - Vector min +def : T_PP_pat <A2_vminh, int_hexagon_A2_vminh>; +def : T_PP_pat <A2_vminw, int_hexagon_A2_vminw>; +def : T_PP_pat <A2_vminub, int_hexagon_A2_vminub>; +def : T_PP_pat <A2_vminuh, int_hexagon_A2_vminuh>; +def : T_PP_pat <A2_vminuw, int_hexagon_A2_vminuw>; + +// ALU64 - Vector sub +def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubub>; +def : T_PP_pat <A2_vsububs, int_hexagon_A2_vsububs>; +def : T_PP_pat <A2_vsubh, int_hexagon_A2_vsubh>; +def : T_PP_pat <A2_vsubhs, int_hexagon_A2_vsubhs>; +def : T_PP_pat <A2_vsubuhs, int_hexagon_A2_vsubuhs>; +def : T_PP_pat <A2_vsubw, int_hexagon_A2_vsubw>; +def : T_PP_pat <A2_vsubws, int_hexagon_A2_vsubws>; + +// ALU64 - Vector compare bytes +def : T_PP_pat <A2_vcmpbeq, int_hexagon_A2_vcmpbeq>; +def : T_PP_pat <A4_vcmpbgt, int_hexagon_A4_vcmpbgt>; +def : T_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>; + +// ALU64 - Vector compare halfwords +def : T_PP_pat <A2_vcmpheq, int_hexagon_A2_vcmpheq>; +def : T_PP_pat <A2_vcmphgt, int_hexagon_A2_vcmphgt>; +def : T_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>; + +// ALU64 - Vector compare words +def : T_PP_pat <A2_vcmpweq, int_hexagon_A2_vcmpweq>; +def : T_PP_pat <A2_vcmpwgt, int_hexagon_A2_vcmpwgt>; +def : T_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>; + +// ALU64 / VB / Vector mux. +def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +// MPY - Multiply and use full result +// Rdd = mpy[u](Rs, Rt) +def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>; +def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>; + +// Complex multiply real or imaginary +def : T_RR_pat <M2_cmpyi_s0, int_hexagon_M2_cmpyi_s0>; +def : T_RR_pat <M2_cmpyr_s0, int_hexagon_M2_cmpyr_s0>; + +// Complex multiply +def : T_RR_pat <M2_cmpys_s0, int_hexagon_M2_cmpys_s0>; +def : T_RR_pat <M2_cmpysc_s0, int_hexagon_M2_cmpysc_s0>; +def : T_RR_pat <M2_cmpys_s1, int_hexagon_M2_cmpys_s1>; +def : T_RR_pat <M2_cmpysc_s1, int_hexagon_M2_cmpysc_s1>; + +// Vector multiply halfwords +// Rdd=vmpyh(Rs,Rt)[:<<1]:sat +def : T_RR_pat <M2_vmpy2s_s0, int_hexagon_M2_vmpy2s_s0>; +def : T_RR_pat <M2_vmpy2s_s1, int_hexagon_M2_vmpy2s_s1>; + +// Rxx[+-]= mpy[u](Rs,Rt) +def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>; +def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>; +def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>; +def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>; + +// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat +def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>; +def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>; +def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>; +def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>; + +// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat +def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>; +def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>; +def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>; +def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>; + +// Rxx+=cmpy[ir](Rs,Rt) +def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>; +def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>; + +// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat] +def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>; +def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>; +def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>; + +/******************************************************************** +* CR * +*********************************************************************/ +class qi_CRInst_qi_pat<InstHexagon Inst, Intrinsic IntID> : + Pat<(i32 (IntID IntRegs:$Rs)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>; + +class qi_CRInst_qiqi_pat<InstHexagon Inst, Intrinsic IntID> : + Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>; + +def: qi_CRInst_qi_pat<C2_not, int_hexagon_C2_not>; +def: qi_CRInst_qi_pat<C2_all8, int_hexagon_C2_all8>; +def: qi_CRInst_qi_pat<C2_any8, int_hexagon_C2_any8>; + +def: qi_CRInst_qiqi_pat<C2_and, int_hexagon_C2_and>; +def: qi_CRInst_qiqi_pat<C2_andn, int_hexagon_C2_andn>; +def: qi_CRInst_qiqi_pat<C2_or, int_hexagon_C2_or>; +def: qi_CRInst_qiqi_pat<C2_orn, int_hexagon_C2_orn>; +def: qi_CRInst_qiqi_pat<C2_xor, int_hexagon_C2_xor>; + +// Assembler mapped from Pd4=Ps4 to Pd4=or(Ps4,Ps4) +def : Pat<(int_hexagon_C2_pxfer_map PredRegs:$src), + (C2_pxfer_map PredRegs:$src)>; + +// Multiply 32x32 and use lower result +def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>; +def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>; +def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>; + +// Subtract and accumulate +def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>; + +// Add and accumulate +def : T_RRR_pat <M2_acci, int_hexagon_M2_acci>; +def : T_RRR_pat <M2_nacci, int_hexagon_M2_nacci>; +def : T_RRI_pat <M2_accii, int_hexagon_M2_accii>; +def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>; + +// XOR and XOR with destination +def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>; + +class MType_R32_pat <Intrinsic IntID, InstHexagon OutputInst> : + Pat <(IntID IntRegs:$src1, IntRegs:$src2), + (OutputInst IntRegs:$src1, IntRegs:$src2)>; + +// Vector dual multiply with round and pack + +def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>; + +def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>; + +// Vector multiply halfwords with round and pack + +def : MType_R32_pat <int_hexagon_M2_vmpy2s_s0pack, M2_vmpy2s_s0pack>; +def : MType_R32_pat <int_hexagon_M2_vmpy2s_s1pack, M2_vmpy2s_s1pack>; + +// Multiply and use lower result +def : MType_R32_pat <int_hexagon_M2_mpyi, M2_mpyi>; +def : T_RI_pat<M2_mpysmi, int_hexagon_M2_mpysmi>; + +// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32) +def : MType_R32_pat <int_hexagon_M2_mpyui, M2_mpyi>; + +// Multiply and use upper result +def : MType_R32_pat <int_hexagon_M2_mpy_up, M2_mpy_up>; +def : MType_R32_pat <int_hexagon_M2_mpyu_up, M2_mpyu_up>; +def : MType_R32_pat <int_hexagon_M2_hmmpyh_rs1, M2_hmmpyh_rs1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyl_rs1, M2_hmmpyl_rs1>; +def : MType_R32_pat <int_hexagon_M2_dpmpyss_rnd_s0, M2_dpmpyss_rnd_s0>; + +// Complex multiply with round and pack +// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat +def : MType_R32_pat <int_hexagon_M2_cmpyrs_s0, M2_cmpyrs_s0>; +def : MType_R32_pat <int_hexagon_M2_cmpyrs_s1, M2_cmpyrs_s1>; +def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s0, M2_cmpyrsc_s0>; +def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s1, M2_cmpyrsc_s1>; + +/******************************************************************** +* STYPE/ALU * +*********************************************************************/ +def : T_P_pat <A2_absp, int_hexagon_A2_absp>; +def : T_P_pat <A2_negp, int_hexagon_A2_negp>; +def : T_P_pat <A2_notp, int_hexagon_A2_notp>; + +/******************************************************************** +* STYPE/BIT * +*********************************************************************/ + +// Count leading/trailing +def: T_R_pat<S2_cl0, int_hexagon_S2_cl0>; +def: T_P_pat<S2_cl0p, int_hexagon_S2_cl0p>; +def: T_R_pat<S2_cl1, int_hexagon_S2_cl1>; +def: T_P_pat<S2_cl1p, int_hexagon_S2_cl1p>; +def: T_R_pat<S2_clb, int_hexagon_S2_clb>; +def: T_P_pat<S2_clbp, int_hexagon_S2_clbp>; +def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>; +def: T_R_pat<S2_ct0, int_hexagon_S2_ct0>; +def: T_R_pat<S2_ct1, int_hexagon_S2_ct1>; + +// Compare bit mask +def: T_RR_pat<C2_bitsclr, int_hexagon_C2_bitsclr>; +def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>; +def: T_RR_pat<C2_bitsset, int_hexagon_C2_bitsset>; + +// Vector shuffle +def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>; +def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>; +def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>; +def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>; + +// Vector truncate +def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>; +def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>; + +// Linear feedback-shift Iteration. +def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>; + +// Vector splice +def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>; +def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>; + +// Shift by immediate and add +def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>; + +// Extract bitfield +def : T_PII_pat<S2_extractup, int_hexagon_S2_extractup>; +def : T_RII_pat<S2_extractu, int_hexagon_S2_extractu>; +def : T_RP_pat <S2_extractu_rp, int_hexagon_S2_extractu_rp>; +def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>; + +// Insert bitfield +def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, + DoubleRegs:$src3), + (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>; + +def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1), + (I64:$src2), (I64:$src3))), + (i64 (S2_insertp_rp (I64:$src1), (I64:$src2), + (I64:$src3)))>; + +def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, + u5ImmPred:$src3, u5ImmPred:$src4), + (S2_insert IntRegs:$src1, IntRegs:$src2, + u5ImmPred:$src3, u5ImmPred:$src4)>; + +def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1), + (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)), + (i64 (S2_insertp (I64:$src1), (I64:$src2), + u6ImmPred:$src3, u6ImmPred:$src4))>; + + +// Innterleave/deinterleave +def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>; +def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>; + +// Set/Clear/Toggle Bit +def: T_RI_pat<S2_setbit_i, int_hexagon_S2_setbit_i>; +def: T_RI_pat<S2_clrbit_i, int_hexagon_S2_clrbit_i>; +def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>; + +def: T_RR_pat<S2_setbit_r, int_hexagon_S2_setbit_r>; +def: T_RR_pat<S2_clrbit_r, int_hexagon_S2_clrbit_r>; +def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>; + +// Test Bit +def: T_RI_pat<S2_tstbit_i, int_hexagon_S2_tstbit_i>; +def: T_RR_pat<S2_tstbit_r, int_hexagon_S2_tstbit_r>; + +/******************************************************************** +* STYPE/COMPLEX * +*********************************************************************/ +// Vector Complex conjugate +def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>; + +// Vector Complex rotate +def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>; + +/******************************************************************** +* STYPE/PERM * +*********************************************************************/ + +// Vector saturate without pack +def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>; +def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>; +def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>; +def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>; + +/******************************************************************** +* STYPE/PRED * +*********************************************************************/ + +// Predicate transfer +def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))), + (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; +def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))), + (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; + +// Mask generate from predicate +def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))), + (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>; + +// Viterbi pack even and odd predicate bits +def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))), + (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)), + (C2_tfrrp (I32:$Rt))))>; + +/******************************************************************** +* STYPE/SHIFT * +*********************************************************************/ + +def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>; +def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>; +def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>; + +def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>; +def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>; +def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>; +def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>; + +def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>; +def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>; +def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>; +def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>; + +def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>; +def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>; + +def : T_R_pat <S2_vsxtbh, int_hexagon_S2_vsxtbh>; +def : T_R_pat <S2_vzxtbh, int_hexagon_S2_vzxtbh>; +def : T_R_pat <S2_vsxthw, int_hexagon_S2_vsxthw>; +def : T_R_pat <S2_vzxthw, int_hexagon_S2_vzxthw>; +def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>; +def : T_R_pat <A2_sxtw, int_hexagon_A2_sxtw>; + +// Vector saturate and pack +def : T_R_pat <S2_svsathb, int_hexagon_S2_svsathb>; +def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>; +def : T_P_pat <S2_vsathub, int_hexagon_S2_vsathub>; +def : T_P_pat <S2_vsatwh, int_hexagon_S2_vsatwh>; +def : T_P_pat <S2_vsatwuh, int_hexagon_S2_vsatwuh>; +def : T_P_pat <S2_vsathb, int_hexagon_S2_vsathb>; + +def : T_P_pat <S2_vtrunohb, int_hexagon_S2_vtrunohb>; +def : T_P_pat <S2_vtrunehb, int_hexagon_S2_vtrunehb>; +def : T_P_pat <S2_vrndpackwh, int_hexagon_S2_vrndpackwh>; +def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>; +def : T_R_pat <S2_brev, int_hexagon_S2_brev>; +def : T_R_pat <S2_vsplatrb, int_hexagon_S2_vsplatrb>; + +def : T_R_pat <A2_abs, int_hexagon_A2_abs>; +def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>; +def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>; + +def : T_R_pat <A2_swiz, int_hexagon_A2_swiz>; + +def : T_P_pat <A2_sat, int_hexagon_A2_sat>; +def : T_R_pat <A2_sath, int_hexagon_A2_sath>; +def : T_R_pat <A2_satuh, int_hexagon_A2_satuh>; +def : T_R_pat <A2_satub, int_hexagon_A2_satub>; +def : T_R_pat <A2_satb, int_hexagon_A2_satb>; + +// Vector arithmetic shift right by immediate with truncate and pack. +def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>; + +def : T_RI_pat <S2_asr_i_r, int_hexagon_S2_asr_i_r>; +def : T_RI_pat <S2_lsr_i_r, int_hexagon_S2_lsr_i_r>; +def : T_RI_pat <S2_asl_i_r, int_hexagon_S2_asl_i_r>; +def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>; +def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax, + int_hexagon_S2_asr_i_r_rnd_goodsyntax>; + +// Shift left by immediate with saturation. +def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>; + +//===----------------------------------------------------------------------===// +// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions. +//===----------------------------------------------------------------------===// +class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst, + SDNodeXForm XformImm> + : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4), + (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, + (XformImm u5ImmPred:$src4))>; + + +// Table Index : Extract and insert bits. +// Map to the real hardware instructions after subtracting appropriate +// values from the 4th input operand. Please note that subtraction is not +// needed for int_hexagon_S2_tableidxb_goodsyntax. + +def : Pat <(int_hexagon_S2_tableidxb_goodsyntax IntRegs:$src1, IntRegs:$src2, + u4ImmPred:$src3, u5ImmPred:$src4), + (S2_tableidxb IntRegs:$src1, IntRegs:$src2, + u4ImmPred:$src3, u5ImmPred:$src4)>; + +def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh, + DEC_CONST_SIGNED>; +def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw, + DEC2_CONST_SIGNED>; +def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd, + DEC3_CONST_SIGNED>; + +/******************************************************************** +* STYPE/VH * +*********************************************************************/ + +// Vector absolute value halfwords with and without saturation +// Rdd64=vabsh(Rss64)[:sat] +def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>; +def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>; + +// Vector shift halfwords by immediate +// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4) +def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>; +def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>; +def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>; + +// Vector shift halfwords by register +// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32) +def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>; +def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>; +def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>; +def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>; + +/******************************************************************** +* STYPE/VW * +*********************************************************************/ + +// Vector absolute value words with and without saturation +def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>; +def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>; + +// Vector shift words by immediate. +// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5) +def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>; +def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>; +def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>; + +// Vector shift words by register. +// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32) +def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>; +def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>; +def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>; +def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>; + +// Vector shift words with truncate and pack + +def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>; + +def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>; +def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>; + +def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))), + (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>; +def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))), + (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>; + +/******************************************************************** +* ST +*********************************************************************/ + +class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru), + (MI I32:$Rs, Val:$Rt, I32:$Ru)>; + +def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>; +def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>; +def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>; +def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>; +def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>; + +class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), + (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>; + +def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>; +def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>; +def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>; +def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>; +def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>; + +include "HexagonIntrinsicsV3.td" +include "HexagonIntrinsicsV4.td" +include "HexagonIntrinsicsV5.td" +include "HexagonIntrinsicsV60.td" + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td new file mode 100644 index 0000000..4c28b28 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -0,0 +1,40 @@ +//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Multiply 64-bit and use lower result +// +// Optimized with intrinisics accumulates +// +def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), + (i64 + (A2_combinew + (M2_maci + (M2_maci + (i32 + (EXTRACT_SUBREG + (i64 + (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), + subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))), + (i32 + (EXTRACT_SUBREG + (i64 + (M2_dpmpyuu_s0 + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), subreg_loreg))))>; + + + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td new file mode 100644 index 0000000..6152cb0 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td @@ -0,0 +1,27 @@ +//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format. +// +//===----------------------------------------------------------------------===// + +// Vector reduce complex multiply real or imaginary +def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>; +def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>; +def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>; + +// Vector reduce add unsigned halfwords +def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>; + +def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>; +def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>; +def: T_PP_pat<A2_minp, int_hexagon_A2_minp>; +def: T_PP_pat<A2_minup, int_hexagon_A2_minup>; +def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>; +def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td new file mode 100644 index 0000000..c80a188 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -0,0 +1,318 @@ +//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V4 Architecture Extensions +// Application-Level Specification +// 80-V9418-12 Rev. A +// June 15, 2010 + +// Vector reduce multiply word by signed half (32x16) +//Rdd=vrmpyweh(Rss,Rtt)[:<<1] +def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>; +def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>; +def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>; + +//Rdd+=vrmpyweh(Rss,Rtt)[:<<1] +def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>; +def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>; +def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>; + +// Vector multiply halfwords, signed by unsigned +// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat +def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>; +def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>; + +// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat +def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>; +def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>; + +// Vector polynomial multiply halfwords +// Rdd=vpmpyh(Rs,Rt) +def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>; +// Rxx[^]=vpmpyh(Rs,Rt) +def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>; + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>; +// Rxx^=pmpyw(Rs,Rt) +def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>; + +//Rxx^=asr(Rss,Rt) +def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>; +//Rxx^=asl(Rss,Rt) +def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>; +//Rxx^=lsr(Rss,Rt) +def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>; +//Rxx^=lsl(Rss,Rt) +def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>; + +// Multiply and use upper result +def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>; +def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>; +def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>; + +// Vector reduce add unsigned halfwords +def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>; + +def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>; + +def: T_P_pat <S2_ct0p, int_hexagon_S2_ct0p>; +def: T_P_pat <S2_ct1p, int_hexagon_S2_ct1p>; +def: T_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>; +def: T_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>; +def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>; + + +class vcmpImm_pat <InstHexagon MI, Intrinsic IntID, PatLeaf immPred> : + Pat <(IntID (i64 DoubleRegs:$src1), immPred:$src2), + (MI (i64 DoubleRegs:$src1), immPred:$src2)>; + +def : vcmpImm_pat <A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi, u8ImmPred>; +def : vcmpImm_pat <A4_vcmpbgti, int_hexagon_A4_vcmpbgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui, u7ImmPred>; + +def : vcmpImm_pat <A4_vcmpheqi, int_hexagon_A4_vcmpheqi, s8ImmPred>; +def : vcmpImm_pat <A4_vcmphgti, int_hexagon_A4_vcmphgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmphgtui, int_hexagon_A4_vcmphgtui, u7ImmPred>; + +def : vcmpImm_pat <A4_vcmpweqi, int_hexagon_A4_vcmpweqi, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpwgti, int_hexagon_A4_vcmpwgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui, u7ImmPred>; + +def : T_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>; + +def : T_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>; +def : T_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>; +def : T_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>; +def : T_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>; +def : T_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>; +def : T_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>; + +def : T_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>; +def : T_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>; +def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>; + +def : T_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>; +def : T_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>; +def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>; + +def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>; + +def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>; + +def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>; +def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>; +def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>; +def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>; +// Multiply 32x32 and use upper result +def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>; +def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>; + +// Complex multiply 32x16 +def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>; +def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>; + +def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>; +def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>; + +def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>; +def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>; + +// Complex add/sub halfwords/words +def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>; +def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>; +def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>; +def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>; + +def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>; +def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>; + +// Extract bitfield +def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>; +def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>; +def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>; +def : T_RII_pat <S4_extract, int_hexagon_S4_extract>; + +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>; + +// Shift an immediate left by register amount +def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>; + +// Vector reduce maximum halfwords +def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>; +def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>; + +// Vector reduce maximum words +def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>; +def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>; + +// Vector reduce minimum halfwords +def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>; +def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>; + +// Vector reduce minimum words +def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>; +def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>; + +// Rotate and reduce bytes +def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, + u2ImmPred:$src3), + (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>; + +// Rotate and reduce bytes with accumulation +// Rxx+=vrcrotate(Rss,Rt,#u2) +def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3, u2ImmPred:$src4), + (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3, u2ImmPred:$src4)>; + +// Vector conditional negate +def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>; + +// Logical xor with xor accumulation +def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>; + +// ALU64 - Vector min/max byte +def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>; +def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>; + +// Shift and add/sub/and/or +def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>; +def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>; +def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>; +def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>; +def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>; +def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>; +def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>; +def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>; + +// Split bitfield +def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>; +def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>; + +def: T_RR_pat<S4_parity, int_hexagon_S4_parity>; + +def: T_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>; +def: T_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>; + +def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>; +def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>; +def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>; + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ + +// ALU32 / ALU / Logical Operations. +def: T_RR_pat<A4_andn, int_hexagon_A4_andn>; +def: T_RR_pat<A4_orn, int_hexagon_A4_orn>; + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ + +// Combine Words Into Doublewords. +def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>; +def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>; + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ + +// Compare +def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>; +def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>; +def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>; + +def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>; +def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>; + +def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>; +def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>; + +/******************************************************************** +* CR * +*********************************************************************/ + +// CR / Logical Operations On Predicates. + +class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> : + Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), + (C2_tfrrp IntRegs:$Rt), + (C2_tfrrp IntRegs:$Ru))))>; + +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and, C4_and_and>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn, C4_and_andn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or, C4_and_or>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn, C4_and_orn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and, C4_or_and>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn, C4_or_andn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or, C4_or_or>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn, C4_or_orn>; + +/******************************************************************** +* XTYPE/ALU * +*********************************************************************/ + +// Add And Accumulate. + +def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>; +def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>; + + +// XTYPE / ALU / Logical-logical Words. +def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>; +def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>; +def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>; +def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>; +def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>; +def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>; +def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>; +def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>; +def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>; +def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>; +def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>; + +def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>; +def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>; +def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>; + +// Modulo wrap. +def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>; + +// Arithmetic/Convergent round +// Rd=[cround|round](Rs,Rt)[:sat] +// Rd=[cround|round](Rs,#u5)[:sat] +def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>; +def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>; + +def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>; +def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>; + +def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>; +def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>; + +def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td new file mode 100644 index 0000000..60e6b1e --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td @@ -0,0 +1,111 @@ +//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//Rdd[+]=vrmpybsu(Rss,Rtt) +//Rdd[+]=vrmpybuu(Rss,Rtt) +let Predicates = [HasV5T] in { +def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>; +def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>; + +def : T_PP_pat <M5_vdmpybsu, int_hexagon_M5_vdmpybsu>; + +def : T_PPP_pat <M5_vrmacbsu, int_hexagon_M5_vrmacbsu>; +def : T_PPP_pat <M5_vrmacbuu, int_hexagon_M5_vrmacbuu>; +//Rxx+=vdmpybsu(Rss,Rtt):sat +def : T_PPP_pat <M5_vdmacbsu, int_hexagon_M5_vdmacbsu>; + +// Vector multiply bytes +// Rdd=vmpyb[s]u(Rs,Rt) +def : T_RR_pat <M5_vmpybsu, int_hexagon_M5_vmpybsu>; +def : T_RR_pat <M5_vmpybuu, int_hexagon_M5_vmpybuu>; + +// Rxx+=vmpyb[s]u(Rs,Rt) +def : T_PRR_pat <M5_vmacbsu, int_hexagon_M5_vmacbsu>; +def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>; + +// Rd=vaddhub(Rss,Rtt):sat +def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>; +} + +def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>; +def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>; +def : T_FF_pat<F2_sfmpy, int_hexagon_F2_sfmpy>; +def : T_FF_pat<F2_sfmax, int_hexagon_F2_sfmax>; +def : T_FF_pat<F2_sfmin, int_hexagon_F2_sfmin>; + +def : T_FF_pat<F2_sffixupn, int_hexagon_F2_sffixupn>; +def : T_FF_pat<F2_sffixupd, int_hexagon_F2_sffixupd>; +def : T_F_pat <F2_sffixupr, int_hexagon_F2_sffixupr>; + +def: qi_CRInst_qiqi_pat<C4_fastcorner9, int_hexagon_C4_fastcorner9>; +def: qi_CRInst_qiqi_pat<C4_fastcorner9_not, int_hexagon_C4_fastcorner9_not>; + +def : T_P_pat <S5_popcountp, int_hexagon_S5_popcountp>; +def : T_PI_pat <S5_asrhub_sat, int_hexagon_S5_asrhub_sat>; + +def : T_PI_pat <S2_asr_i_p_rnd, int_hexagon_S2_asr_i_p_rnd>; +def : T_PI_pat <S2_asr_i_p_rnd_goodsyntax, + int_hexagon_S2_asr_i_p_rnd_goodsyntax>; + +def : T_PI_pat <S5_asrhub_rnd_sat_goodsyntax, + int_hexagon_S5_asrhub_rnd_sat_goodsyntax>; + +def : T_PI_pat <S5_vasrhrnd_goodsyntax, int_hexagon_S5_vasrhrnd_goodsyntax>; + +def : T_FFF_pat <F2_sffma, int_hexagon_F2_sffma>; +def : T_FFF_pat <F2_sffms, int_hexagon_F2_sffms>; +def : T_FFF_pat <F2_sffma_lib, int_hexagon_F2_sffma_lib>; +def : T_FFF_pat <F2_sffms_lib, int_hexagon_F2_sffms_lib>; +def : T_FFFQ_pat <F2_sffma_sc, int_hexagon_F2_sffma_sc>; + +// Compare floating-point value +def : T_FF_pat <F2_sfcmpge, int_hexagon_F2_sfcmpge>; +def : T_FF_pat <F2_sfcmpuo, int_hexagon_F2_sfcmpuo>; +def : T_FF_pat <F2_sfcmpeq, int_hexagon_F2_sfcmpeq>; +def : T_FF_pat <F2_sfcmpgt, int_hexagon_F2_sfcmpgt>; + +def : T_DD_pat <F2_dfcmpeq, int_hexagon_F2_dfcmpeq>; +def : T_DD_pat <F2_dfcmpgt, int_hexagon_F2_dfcmpgt>; +def : T_DD_pat <F2_dfcmpge, int_hexagon_F2_dfcmpge>; +def : T_DD_pat <F2_dfcmpuo, int_hexagon_F2_dfcmpuo>; + +// Create floating-point value +def : T_I_pat <F2_sfimm_p, int_hexagon_F2_sfimm_p>; +def : T_I_pat <F2_sfimm_n, int_hexagon_F2_sfimm_n>; +def : T_I_pat <F2_dfimm_p, int_hexagon_F2_dfimm_p>; +def : T_I_pat <F2_dfimm_n, int_hexagon_F2_dfimm_n>; + +def : T_DI_pat <F2_dfclass, int_hexagon_F2_dfclass>; +def : T_FI_pat <F2_sfclass, int_hexagon_F2_sfclass>; +def : T_F_pat <F2_conv_sf2df, int_hexagon_F2_conv_sf2df>; +def : T_D_pat <F2_conv_df2sf, int_hexagon_F2_conv_df2sf>; +def : T_R_pat <F2_conv_uw2sf, int_hexagon_F2_conv_uw2sf>; +def : T_R_pat <F2_conv_uw2df, int_hexagon_F2_conv_uw2df>; +def : T_R_pat <F2_conv_w2sf, int_hexagon_F2_conv_w2sf>; +def : T_R_pat <F2_conv_w2df, int_hexagon_F2_conv_w2df>; +def : T_P_pat <F2_conv_ud2sf, int_hexagon_F2_conv_ud2sf>; +def : T_P_pat <F2_conv_ud2df, int_hexagon_F2_conv_ud2df>; +def : T_P_pat <F2_conv_d2sf, int_hexagon_F2_conv_d2sf>; +def : T_P_pat <F2_conv_d2df, int_hexagon_F2_conv_d2df>; +def : T_F_pat <F2_conv_sf2uw, int_hexagon_F2_conv_sf2uw>; +def : T_F_pat <F2_conv_sf2w, int_hexagon_F2_conv_sf2w>; +def : T_F_pat <F2_conv_sf2ud, int_hexagon_F2_conv_sf2ud>; +def : T_F_pat <F2_conv_sf2d, int_hexagon_F2_conv_sf2d>; +def : T_D_pat <F2_conv_df2uw, int_hexagon_F2_conv_df2uw>; +def : T_D_pat <F2_conv_df2w, int_hexagon_F2_conv_df2w>; +def : T_D_pat <F2_conv_df2ud, int_hexagon_F2_conv_df2ud>; +def : T_D_pat <F2_conv_df2d, int_hexagon_F2_conv_df2d>; +def : T_F_pat <F2_conv_sf2uw_chop, int_hexagon_F2_conv_sf2uw_chop>; +def : T_F_pat <F2_conv_sf2w_chop, int_hexagon_F2_conv_sf2w_chop>; +def : T_F_pat <F2_conv_sf2ud_chop, int_hexagon_F2_conv_sf2ud_chop>; +def : T_F_pat <F2_conv_sf2d_chop, int_hexagon_F2_conv_sf2d_chop>; +def : T_D_pat <F2_conv_df2uw_chop, int_hexagon_F2_conv_df2uw_chop>; +def : T_D_pat <F2_conv_df2w_chop, int_hexagon_F2_conv_df2w_chop>; +def : T_D_pat <F2_conv_df2ud_chop, int_hexagon_F2_conv_df2ud_chop>; +def : T_D_pat <F2_conv_df2d_chop, int_hexagon_F2_conv_df2d_chop>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td new file mode 100644 index 0000000..24a3e4d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td @@ -0,0 +1,836 @@ +//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format. +// +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { +def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins ), + "$dst=#0", + [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>; + +def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins ), + "$dst=#0", + [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>; +} +let isPseudo = 1 in +def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=vassignp_W($src1)", + [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=vassignp_W_128B($src1)", + [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B + VecDblRegs128B:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=lo_W($src1)", + [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=hi_W($src1)", + [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=lo_W($src1)", + [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=hi_W($src1)", + [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>; + +let AddedComplexity = 100 in { +def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))), + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >, + Requires<[UseHVXSgl]>; + +def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))), + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >, + Requires<[UseHVXSgl]>; + +def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))), + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), + subreg_loreg)) >, + Requires<[UseHVXDbl]>; + +def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))), + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), + subreg_hireg)) >, + Requires<[UseHVXDbl]>; +} + +def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v64i8 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v64i8 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v8i64 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v8i64 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))), + (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))), + (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v64i8 (bitconvert (v512i1 VecPredRegs:$src1))), + (v64i8 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v8i64 (bitconvert (v512i1 VecPredRegs:$src1))), + (v8i64 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v128i8 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v128i8 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v16i64 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v16i64 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v128i8 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v128i8 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v16i64 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v16i64 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +let AddedComplexity = 140 in { +def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)), + (V6_vS32b_ai IntRegs:$addr, 0, + (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101))))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (load (i32 IntRegs:$addr))), + (v512i1 (V6_vandvrt + (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)), + (V6_vS32b_ai_128B IntRegs:$addr, 0, + (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101))))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (load (i32 IntRegs:$addr))), + (v1024i1 (V6_vandvrt_128B + (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; +} + +multiclass T_R_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>, + Requires<[UseHVXSgl]>; + def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1), + (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1), + (MI VectorRegs:$src1)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1), + (MI VecPredRegs:$src1)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2), + (MI VecDblRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2), + (!cast<InstHexagon>(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2), + (MI VectorRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2), + (!cast<InstHexagon>(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2), + (MI VecDblRegs:$src1, VectorRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WW_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2), + (MI VecDblRegs:$src1, VecDblRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2), + (MI VectorRegs:$src1, VectorRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2), + (MI VecPredRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + IntRegs:$src2), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QQ_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2), + (MI VecPredRegs:$src1, VecPredRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + VecPredRegs128B:$src2), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + VecPredRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WWR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3), + (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VWR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QVV_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VQR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VecPredRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VecPredRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + + +multiclass T_QVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, imm:$src3), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, imm:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WRI_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3), + (MI VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + IntRegs:$src2, imm:$src3), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + IntRegs:$src2, imm:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WWRI_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4), + (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3, imm:$src4), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3, imm:$src4)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4), + (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4), + (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4), + (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4), + (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4)>, + Requires<[UseHVXDbl]>; +} + +defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>; +defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>; +defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>; +defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>; +defm : T_VR_pat <V6_vrmpybus, int_hexagon_V6_vrmpybus>; +defm : T_WR_pat <V6_vdsaduh, int_hexagon_V6_vdsaduh>; +defm : T_VR_pat <V6_vdmpybus, int_hexagon_V6_vdmpybus>; +defm : T_WR_pat <V6_vdmpybus_dv, int_hexagon_V6_vdmpybus_dv>; +defm : T_VR_pat <V6_vdmpyhsusat, int_hexagon_V6_vdmpyhsusat>; +defm : T_WR_pat <V6_vdmpyhsuisat, int_hexagon_V6_vdmpyhsuisat>; +defm : T_VR_pat <V6_vdmpyhsat, int_hexagon_V6_vdmpyhsat>; +defm : T_WR_pat <V6_vdmpyhisat, int_hexagon_V6_vdmpyhisat>; +defm : T_WR_pat <V6_vdmpyhb_dv, int_hexagon_V6_vdmpyhb_dv>; +defm : T_VR_pat <V6_vmpybus, int_hexagon_V6_vmpybus>; +defm : T_WR_pat <V6_vmpabus, int_hexagon_V6_vmpabus>; +defm : T_WR_pat <V6_vmpahb, int_hexagon_V6_vmpahb>; +defm : T_VR_pat <V6_vmpyh, int_hexagon_V6_vmpyh>; +defm : T_VR_pat <V6_vmpyhss, int_hexagon_V6_vmpyhss>; +defm : T_VR_pat <V6_vmpyhsrs, int_hexagon_V6_vmpyhsrs>; +defm : T_VR_pat <V6_vmpyuh, int_hexagon_V6_vmpyuh>; +defm : T_VR_pat <V6_vmpyihb, int_hexagon_V6_vmpyihb>; +defm : T_VR_pat <V6_vror, int_hexagon_V6_vror>; +defm : T_VR_pat <V6_vasrw, int_hexagon_V6_vasrw>; +defm : T_VR_pat <V6_vasrh, int_hexagon_V6_vasrh>; +defm : T_VR_pat <V6_vaslw, int_hexagon_V6_vaslw>; +defm : T_VR_pat <V6_vaslh, int_hexagon_V6_vaslh>; +defm : T_VR_pat <V6_vlsrw, int_hexagon_V6_vlsrw>; +defm : T_VR_pat <V6_vlsrh, int_hexagon_V6_vlsrh>; +defm : T_VR_pat <V6_vmpyiwh, int_hexagon_V6_vmpyiwh>; +defm : T_VR_pat <V6_vmpyiwb, int_hexagon_V6_vmpyiwb>; +defm : T_WR_pat <V6_vtmpyhb, int_hexagon_V6_vtmpyhb>; +defm : T_VR_pat <V6_vmpyub, int_hexagon_V6_vmpyub>; + +defm : T_VV_pat <V6_vrmpyubv, int_hexagon_V6_vrmpyubv>; +defm : T_VV_pat <V6_vrmpybv, int_hexagon_V6_vrmpybv>; +defm : T_VV_pat <V6_vrmpybusv, int_hexagon_V6_vrmpybusv>; +defm : T_VV_pat <V6_vdmpyhvsat, int_hexagon_V6_vdmpyhvsat>; +defm : T_VV_pat <V6_vmpybv, int_hexagon_V6_vmpybv>; +defm : T_VV_pat <V6_vmpyubv, int_hexagon_V6_vmpyubv>; +defm : T_VV_pat <V6_vmpybusv, int_hexagon_V6_vmpybusv>; +defm : T_VV_pat <V6_vmpyhv, int_hexagon_V6_vmpyhv>; +defm : T_VV_pat <V6_vmpyuhv, int_hexagon_V6_vmpyuhv>; +defm : T_VV_pat <V6_vmpyhvsrs, int_hexagon_V6_vmpyhvsrs>; +defm : T_VV_pat <V6_vmpyhus, int_hexagon_V6_vmpyhus>; +defm : T_WW_pat <V6_vmpabusv, int_hexagon_V6_vmpabusv>; +defm : T_VV_pat <V6_vmpyih, int_hexagon_V6_vmpyih>; +defm : T_VV_pat <V6_vand, int_hexagon_V6_vand>; +defm : T_VV_pat <V6_vor, int_hexagon_V6_vor>; +defm : T_VV_pat <V6_vxor, int_hexagon_V6_vxor>; +defm : T_VV_pat <V6_vaddw, int_hexagon_V6_vaddw>; +defm : T_VV_pat <V6_vaddubsat, int_hexagon_V6_vaddubsat>; +defm : T_VV_pat <V6_vadduhsat, int_hexagon_V6_vadduhsat>; +defm : T_VV_pat <V6_vaddhsat, int_hexagon_V6_vaddhsat>; +defm : T_VV_pat <V6_vaddwsat, int_hexagon_V6_vaddwsat>; +defm : T_VV_pat <V6_vsubb, int_hexagon_V6_vsubb>; +defm : T_VV_pat <V6_vsubh, int_hexagon_V6_vsubh>; +defm : T_VV_pat <V6_vsubw, int_hexagon_V6_vsubw>; +defm : T_VV_pat <V6_vsububsat, int_hexagon_V6_vsububsat>; +defm : T_VV_pat <V6_vsubuhsat, int_hexagon_V6_vsubuhsat>; +defm : T_VV_pat <V6_vsubhsat, int_hexagon_V6_vsubhsat>; +defm : T_VV_pat <V6_vsubwsat, int_hexagon_V6_vsubwsat>; +defm : T_WW_pat <V6_vaddb_dv, int_hexagon_V6_vaddb_dv>; +defm : T_WW_pat <V6_vaddh_dv, int_hexagon_V6_vaddh_dv>; +defm : T_WW_pat <V6_vaddw_dv, int_hexagon_V6_vaddw_dv>; +defm : T_WW_pat <V6_vaddubsat_dv, int_hexagon_V6_vaddubsat_dv>; +defm : T_WW_pat <V6_vadduhsat_dv, int_hexagon_V6_vadduhsat_dv>; +defm : T_WW_pat <V6_vaddhsat_dv, int_hexagon_V6_vaddhsat_dv>; +defm : T_WW_pat <V6_vaddwsat_dv, int_hexagon_V6_vaddwsat_dv>; +defm : T_WW_pat <V6_vsubb_dv, int_hexagon_V6_vsubb_dv>; +defm : T_WW_pat <V6_vsubh_dv, int_hexagon_V6_vsubh_dv>; +defm : T_WW_pat <V6_vsubw_dv, int_hexagon_V6_vsubw_dv>; +defm : T_WW_pat <V6_vsububsat_dv, int_hexagon_V6_vsububsat_dv>; +defm : T_WW_pat <V6_vsubuhsat_dv, int_hexagon_V6_vsubuhsat_dv>; +defm : T_WW_pat <V6_vsubhsat_dv, int_hexagon_V6_vsubhsat_dv>; +defm : T_WW_pat <V6_vsubwsat_dv, int_hexagon_V6_vsubwsat_dv>; +defm : T_VV_pat <V6_vaddubh, int_hexagon_V6_vaddubh>; +defm : T_VV_pat <V6_vadduhw, int_hexagon_V6_vadduhw>; +defm : T_VV_pat <V6_vaddhw, int_hexagon_V6_vaddhw>; +defm : T_VV_pat <V6_vsububh, int_hexagon_V6_vsububh>; +defm : T_VV_pat <V6_vsubuhw, int_hexagon_V6_vsubuhw>; +defm : T_VV_pat <V6_vsubhw, int_hexagon_V6_vsubhw>; +defm : T_VV_pat <V6_vabsdiffub, int_hexagon_V6_vabsdiffub>; +defm : T_VV_pat <V6_vabsdiffh, int_hexagon_V6_vabsdiffh>; +defm : T_VV_pat <V6_vabsdiffuh, int_hexagon_V6_vabsdiffuh>; +defm : T_VV_pat <V6_vabsdiffw, int_hexagon_V6_vabsdiffw>; +defm : T_VV_pat <V6_vavgub, int_hexagon_V6_vavgub>; +defm : T_VV_pat <V6_vavguh, int_hexagon_V6_vavguh>; +defm : T_VV_pat <V6_vavgh, int_hexagon_V6_vavgh>; +defm : T_VV_pat <V6_vavgw, int_hexagon_V6_vavgw>; +defm : T_VV_pat <V6_vnavgub, int_hexagon_V6_vnavgub>; +defm : T_VV_pat <V6_vnavgh, int_hexagon_V6_vnavgh>; +defm : T_VV_pat <V6_vnavgw, int_hexagon_V6_vnavgw>; +defm : T_VV_pat <V6_vavgubrnd, int_hexagon_V6_vavgubrnd>; +defm : T_VV_pat <V6_vavguhrnd, int_hexagon_V6_vavguhrnd>; +defm : T_VV_pat <V6_vavghrnd, int_hexagon_V6_vavghrnd>; +defm : T_VV_pat <V6_vavgwrnd, int_hexagon_V6_vavgwrnd>; +defm : T_WW_pat <V6_vmpabuuv, int_hexagon_V6_vmpabuuv>; + +defm : T_VVR_pat <V6_vdmpyhb_acc, int_hexagon_V6_vdmpyhb_acc>; +defm : T_VVR_pat <V6_vrmpyub_acc, int_hexagon_V6_vrmpyub_acc>; +defm : T_VVR_pat <V6_vrmpybus_acc, int_hexagon_V6_vrmpybus_acc>; +defm : T_VVR_pat <V6_vdmpybus_acc, int_hexagon_V6_vdmpybus_acc>; +defm : T_VVR_pat <V6_vdmpyhsusat_acc, int_hexagon_V6_vdmpyhsusat_acc>; +defm : T_VVR_pat <V6_vdmpyhsat_acc, int_hexagon_V6_vdmpyhsat_acc>; +defm : T_VVR_pat <V6_vmpyiwb_acc, int_hexagon_V6_vmpyiwb_acc>; +defm : T_VVR_pat <V6_vmpyiwh_acc, int_hexagon_V6_vmpyiwh_acc>; +defm : T_VVR_pat <V6_vmpyihb_acc, int_hexagon_V6_vmpyihb_acc>; +defm : T_VVR_pat <V6_vaslw_acc, int_hexagon_V6_vaslw_acc>; +defm : T_VVR_pat <V6_vasrw_acc, int_hexagon_V6_vasrw_acc>; + +defm : T_VWR_pat <V6_vdmpyhsuisat_acc, int_hexagon_V6_vdmpyhsuisat_acc>; +defm : T_VWR_pat <V6_vdmpyhisat_acc, int_hexagon_V6_vdmpyhisat_acc>; + +defm : T_WVR_pat <V6_vmpybus_acc, int_hexagon_V6_vmpybus_acc>; +defm : T_WVR_pat <V6_vmpyhsat_acc, int_hexagon_V6_vmpyhsat_acc>; +defm : T_WVR_pat <V6_vmpyuh_acc, int_hexagon_V6_vmpyuh_acc>; +defm : T_WVR_pat <V6_vmpyub_acc, int_hexagon_V6_vmpyub_acc>; + +defm : T_WWR_pat <V6_vtmpyb_acc, int_hexagon_V6_vtmpyb_acc>; +defm : T_WWR_pat <V6_vtmpybus_acc, int_hexagon_V6_vtmpybus_acc>; +defm : T_WWR_pat <V6_vtmpyhb_acc, int_hexagon_V6_vtmpyhb_acc>; +defm : T_WWR_pat <V6_vdmpybus_dv_acc, int_hexagon_V6_vdmpybus_dv_acc>; +defm : T_WWR_pat <V6_vdmpyhb_dv_acc, int_hexagon_V6_vdmpyhb_dv_acc>; +defm : T_WWR_pat <V6_vmpabus_acc, int_hexagon_V6_vmpabus_acc>; +defm : T_WWR_pat <V6_vmpahb_acc, int_hexagon_V6_vmpahb_acc>; +defm : T_WWR_pat <V6_vdsaduh_acc, int_hexagon_V6_vdsaduh_acc>; + +defm : T_VVV_pat <V6_vdmpyhvsat_acc, int_hexagon_V6_vdmpyhvsat_acc>; +defm : T_WVV_pat <V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_acc>; +defm : T_WVV_pat <V6_vmpybv_acc, int_hexagon_V6_vmpybv_acc>; +defm : T_WVV_pat <V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_acc>; +defm : T_WVV_pat <V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_acc>; +defm : T_VVV_pat <V6_vmpyiewh_acc, int_hexagon_V6_vmpyiewh_acc>; +defm : T_VVV_pat <V6_vmpyiewuh_acc, int_hexagon_V6_vmpyiewuh_acc>; +defm : T_VVV_pat <V6_vmpyih_acc, int_hexagon_V6_vmpyih_acc>; +defm : T_VVV_pat <V6_vmpyowh_rnd_sacc, int_hexagon_V6_vmpyowh_rnd_sacc>; +defm : T_VVV_pat <V6_vmpyowh_sacc, int_hexagon_V6_vmpyowh_sacc>; +defm : T_WVV_pat <V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_acc>; +defm : T_WVV_pat <V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_acc>; +defm : T_VVV_pat <V6_vrmpybusv_acc, int_hexagon_V6_vrmpybusv_acc>; +defm : T_VVV_pat <V6_vrmpybv_acc, int_hexagon_V6_vrmpybv_acc>; +defm : T_VVV_pat <V6_vrmpyubv_acc, int_hexagon_V6_vrmpyubv_acc>; + +// Compare instructions +defm : T_QVV_pat <V6_veqb_and, int_hexagon_V6_veqb_and>; +defm : T_QVV_pat <V6_veqh_and, int_hexagon_V6_veqh_and>; +defm : T_QVV_pat <V6_veqw_and, int_hexagon_V6_veqw_and>; +defm : T_QVV_pat <V6_vgtb_and, int_hexagon_V6_vgtb_and>; +defm : T_QVV_pat <V6_vgth_and, int_hexagon_V6_vgth_and>; +defm : T_QVV_pat <V6_vgtw_and, int_hexagon_V6_vgtw_and>; +defm : T_QVV_pat <V6_vgtub_and, int_hexagon_V6_vgtub_and>; +defm : T_QVV_pat <V6_vgtuh_and, int_hexagon_V6_vgtuh_and>; +defm : T_QVV_pat <V6_vgtuw_and, int_hexagon_V6_vgtuw_and>; +defm : T_QVV_pat <V6_veqb_or, int_hexagon_V6_veqb_or>; +defm : T_QVV_pat <V6_veqh_or, int_hexagon_V6_veqh_or>; +defm : T_QVV_pat <V6_veqw_or, int_hexagon_V6_veqw_or>; +defm : T_QVV_pat <V6_vgtb_or, int_hexagon_V6_vgtb_or>; +defm : T_QVV_pat <V6_vgth_or, int_hexagon_V6_vgth_or>; +defm : T_QVV_pat <V6_vgtw_or, int_hexagon_V6_vgtw_or>; +defm : T_QVV_pat <V6_vgtub_or, int_hexagon_V6_vgtub_or>; +defm : T_QVV_pat <V6_vgtuh_or, int_hexagon_V6_vgtuh_or>; +defm : T_QVV_pat <V6_vgtuw_or, int_hexagon_V6_vgtuw_or>; +defm : T_QVV_pat <V6_veqb_xor, int_hexagon_V6_veqb_xor>; +defm : T_QVV_pat <V6_veqh_xor, int_hexagon_V6_veqh_xor>; +defm : T_QVV_pat <V6_veqw_xor, int_hexagon_V6_veqw_xor>; +defm : T_QVV_pat <V6_vgtb_xor, int_hexagon_V6_vgtb_xor>; +defm : T_QVV_pat <V6_vgth_xor, int_hexagon_V6_vgth_xor>; +defm : T_QVV_pat <V6_vgtw_xor, int_hexagon_V6_vgtw_xor>; +defm : T_QVV_pat <V6_vgtub_xor, int_hexagon_V6_vgtub_xor>; +defm : T_QVV_pat <V6_vgtuh_xor, int_hexagon_V6_vgtuh_xor>; +defm : T_QVV_pat <V6_vgtuw_xor, int_hexagon_V6_vgtuw_xor>; + +defm : T_VV_pat <V6_vminub, int_hexagon_V6_vminub>; +defm : T_VV_pat <V6_vminuh, int_hexagon_V6_vminuh>; +defm : T_VV_pat <V6_vminh, int_hexagon_V6_vminh>; +defm : T_VV_pat <V6_vminw, int_hexagon_V6_vminw>; +defm : T_VV_pat <V6_vmaxub, int_hexagon_V6_vmaxub>; +defm : T_VV_pat <V6_vmaxuh, int_hexagon_V6_vmaxuh>; +defm : T_VV_pat <V6_vmaxh, int_hexagon_V6_vmaxh>; +defm : T_VV_pat <V6_vmaxw, int_hexagon_V6_vmaxw>; +defm : T_VV_pat <V6_vdelta, int_hexagon_V6_vdelta>; +defm : T_VV_pat <V6_vrdelta, int_hexagon_V6_vrdelta>; +defm : T_VV_pat <V6_vdealb4w, int_hexagon_V6_vdealb4w>; +defm : T_VV_pat <V6_vmpyowh_rnd, int_hexagon_V6_vmpyowh_rnd>; +defm : T_VV_pat <V6_vshuffeb, int_hexagon_V6_vshuffeb>; +defm : T_VV_pat <V6_vshuffob, int_hexagon_V6_vshuffob>; +defm : T_VV_pat <V6_vshufeh, int_hexagon_V6_vshufeh>; +defm : T_VV_pat <V6_vshufoh, int_hexagon_V6_vshufoh>; +defm : T_VV_pat <V6_vshufoeh, int_hexagon_V6_vshufoeh>; +defm : T_VV_pat <V6_vshufoeb, int_hexagon_V6_vshufoeb>; +defm : T_VV_pat <V6_vcombine, int_hexagon_V6_vcombine>; +defm : T_VV_pat <V6_vmpyieoh, int_hexagon_V6_vmpyieoh>; +defm : T_VV_pat <V6_vsathub, int_hexagon_V6_vsathub>; +defm : T_VV_pat <V6_vsatwh, int_hexagon_V6_vsatwh>; +defm : T_VV_pat <V6_vroundwh, int_hexagon_V6_vroundwh>; +defm : T_VV_pat <V6_vroundwuh, int_hexagon_V6_vroundwuh>; +defm : T_VV_pat <V6_vroundhb, int_hexagon_V6_vroundhb>; +defm : T_VV_pat <V6_vroundhub, int_hexagon_V6_vroundhub>; +defm : T_VV_pat <V6_vasrwv, int_hexagon_V6_vasrwv>; +defm : T_VV_pat <V6_vlsrwv, int_hexagon_V6_vlsrwv>; +defm : T_VV_pat <V6_vlsrhv, int_hexagon_V6_vlsrhv>; +defm : T_VV_pat <V6_vasrhv, int_hexagon_V6_vasrhv>; +defm : T_VV_pat <V6_vaslwv, int_hexagon_V6_vaslwv>; +defm : T_VV_pat <V6_vaslhv, int_hexagon_V6_vaslhv>; +defm : T_VV_pat <V6_vaddb, int_hexagon_V6_vaddb>; +defm : T_VV_pat <V6_vaddh, int_hexagon_V6_vaddh>; +defm : T_VV_pat <V6_vmpyiewuh, int_hexagon_V6_vmpyiewuh>; +defm : T_VV_pat <V6_vmpyiowh, int_hexagon_V6_vmpyiowh>; +defm : T_VV_pat <V6_vpackeb, int_hexagon_V6_vpackeb>; +defm : T_VV_pat <V6_vpackeh, int_hexagon_V6_vpackeh>; +defm : T_VV_pat <V6_vpackhub_sat, int_hexagon_V6_vpackhub_sat>; +defm : T_VV_pat <V6_vpackhb_sat, int_hexagon_V6_vpackhb_sat>; +defm : T_VV_pat <V6_vpackwuh_sat, int_hexagon_V6_vpackwuh_sat>; +defm : T_VV_pat <V6_vpackwh_sat, int_hexagon_V6_vpackwh_sat>; +defm : T_VV_pat <V6_vpackob, int_hexagon_V6_vpackob>; +defm : T_VV_pat <V6_vpackoh, int_hexagon_V6_vpackoh>; +defm : T_VV_pat <V6_vmpyewuh, int_hexagon_V6_vmpyewuh>; +defm : T_VV_pat <V6_vmpyowh, int_hexagon_V6_vmpyowh>; + +defm : T_QVV_pat <V6_vaddbq, int_hexagon_V6_vaddbq>; +defm : T_QVV_pat <V6_vaddhq, int_hexagon_V6_vaddhq>; +defm : T_QVV_pat <V6_vaddwq, int_hexagon_V6_vaddwq>; +defm : T_QVV_pat <V6_vaddbnq, int_hexagon_V6_vaddbnq>; +defm : T_QVV_pat <V6_vaddhnq, int_hexagon_V6_vaddhnq>; +defm : T_QVV_pat <V6_vaddwnq, int_hexagon_V6_vaddwnq>; +defm : T_QVV_pat <V6_vsubbq, int_hexagon_V6_vsubbq>; +defm : T_QVV_pat <V6_vsubhq, int_hexagon_V6_vsubhq>; +defm : T_QVV_pat <V6_vsubwq, int_hexagon_V6_vsubwq>; +defm : T_QVV_pat <V6_vsubbnq, int_hexagon_V6_vsubbnq>; +defm : T_QVV_pat <V6_vsubhnq, int_hexagon_V6_vsubhnq>; +defm : T_QVV_pat <V6_vsubwnq, int_hexagon_V6_vsubwnq>; + +defm : T_V_pat <V6_vabsh, int_hexagon_V6_vabsh>; +defm : T_V_pat <V6_vabsw, int_hexagon_V6_vabsw>; +defm : T_V_pat <V6_vabsw_sat, int_hexagon_V6_vabsw_sat>; +defm : T_V_pat <V6_vabsh_sat, int_hexagon_V6_vabsh_sat>; +defm : T_V_pat <V6_vnot, int_hexagon_V6_vnot>; +defm : T_V_pat <V6_vassign, int_hexagon_V6_vassign>; +defm : T_V_pat <V6_vzb, int_hexagon_V6_vzb>; +defm : T_V_pat <V6_vzh, int_hexagon_V6_vzh>; +defm : T_V_pat <V6_vsb, int_hexagon_V6_vsb>; +defm : T_V_pat <V6_vsh, int_hexagon_V6_vsh>; +defm : T_V_pat <V6_vdealh, int_hexagon_V6_vdealh>; +defm : T_V_pat <V6_vdealb, int_hexagon_V6_vdealb>; +defm : T_V_pat <V6_vunpackub, int_hexagon_V6_vunpackub>; +defm : T_V_pat <V6_vunpackuh, int_hexagon_V6_vunpackuh>; +defm : T_V_pat <V6_vunpackb, int_hexagon_V6_vunpackb>; +defm : T_V_pat <V6_vunpackh, int_hexagon_V6_vunpackh>; +defm : T_V_pat <V6_vshuffh, int_hexagon_V6_vshuffh>; +defm : T_V_pat <V6_vshuffb, int_hexagon_V6_vshuffb>; +defm : T_V_pat <V6_vcl0w, int_hexagon_V6_vcl0w>; +defm : T_V_pat <V6_vpopcounth, int_hexagon_V6_vpopcounth>; +defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>; +defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>; +defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>; + +defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>; +defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>; +defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>; + +defm : T_WWRI_pat <V6_vrmpybusi_acc, int_hexagon_V6_vrmpybusi_acc>; +defm : T_WWRI_pat <V6_vrsadubi_acc, int_hexagon_V6_vrsadubi_acc>; +defm : T_WWRI_pat <V6_vrmpyubi_acc, int_hexagon_V6_vrmpyubi_acc>; + +// assembler mapped. +//defm : T_V_pat <V6_vtran2x2, int_hexagon_V6_vtran2x2>; +// not present earlier.. need to add intrinsic +defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignb>; +defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignb>; +defm : T_VVR_pat <V6_vasrwh, int_hexagon_V6_vasrwh>; +defm : T_VVR_pat <V6_vasrwhsat, int_hexagon_V6_vasrwhsat>; +defm : T_VVR_pat <V6_vasrwhrndsat, int_hexagon_V6_vasrwhrndsat>; +defm : T_VVR_pat <V6_vasrwuhsat, int_hexagon_V6_vasrwuhsat>; +defm : T_VVR_pat <V6_vasrhubsat, int_hexagon_V6_vasrhubsat>; +defm : T_VVR_pat <V6_vasrhubrndsat, int_hexagon_V6_vasrhubrndsat>; +defm : T_VVR_pat <V6_vasrhbrndsat, int_hexagon_V6_vasrhbrndsat>; + +defm : T_VVR_pat <V6_vshuffvdd, int_hexagon_V6_vshuffvdd>; +defm : T_VVR_pat <V6_vdealvdd, int_hexagon_V6_vdealvdd>; + +defm : T_WV_pat <V6_vunpackob, int_hexagon_V6_vunpackob>; +defm : T_WV_pat <V6_vunpackoh, int_hexagon_V6_vunpackoh>; +defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignbi>; +defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignbi>; + +defm : T_QVV_pat <V6_vswap, int_hexagon_V6_vswap>; +defm : T_QVV_pat <V6_vmux, int_hexagon_V6_vmux>; +defm : T_QQ_pat <V6_pred_and, int_hexagon_V6_pred_and>; +defm : T_QQ_pat <V6_pred_or, int_hexagon_V6_pred_or>; +defm : T_Q_pat <V6_pred_not, int_hexagon_V6_pred_not>; +defm : T_QQ_pat <V6_pred_xor, int_hexagon_V6_pred_xor>; +defm : T_QQ_pat <V6_pred_or_n, int_hexagon_V6_pred_or_n>; +defm : T_QQ_pat <V6_pred_and_n, int_hexagon_V6_pred_and_n>; +defm : T_VV_pat <V6_veqb, int_hexagon_V6_veqb>; +defm : T_VV_pat <V6_veqh, int_hexagon_V6_veqh>; +defm : T_VV_pat <V6_veqw, int_hexagon_V6_veqw>; +defm : T_VV_pat <V6_vgtb, int_hexagon_V6_vgtb>; +defm : T_VV_pat <V6_vgth, int_hexagon_V6_vgth>; +defm : T_VV_pat <V6_vgtw, int_hexagon_V6_vgtw>; +defm : T_VV_pat <V6_vgtub, int_hexagon_V6_vgtub>; +defm : T_VV_pat <V6_vgtuh, int_hexagon_V6_vgtuh>; +defm : T_VV_pat <V6_vgtuw, int_hexagon_V6_vgtuw>; + +defm : T_VQR_pat <V6_vandqrt_acc, int_hexagon_V6_vandqrt_acc>; +defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>; +defm : T_QR_pat <V6_vandqrt, int_hexagon_V6_vandqrt>; +defm : T_R_pat <V6_lvsplatw, int_hexagon_V6_lvsplatw>; +defm : T_R_pat <V6_pred_scalar2, int_hexagon_V6_pred_scalar2>; +defm : T_VR_pat <V6_vandvrt, int_hexagon_V6_vandvrt>; + +defm : T_VVR_pat <V6_vlutvvb, int_hexagon_V6_vlutvvb>; +defm : T_VVR_pat <V6_vlutvwh, int_hexagon_V6_vlutvwh>; +defm : T_VVVR_pat <V6_vlutvvb_oracc, int_hexagon_V6_vlutvvb_oracc>; +defm : T_WVVR_pat <V6_vlutvwh_oracc, int_hexagon_V6_vlutvwh_oracc>; + +defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>; +def : T_PI_pat <S6_rol_i_p, int_hexagon_S6_rol_i_p>; +def : T_RI_pat <S6_rol_i_r, int_hexagon_S6_rol_i_r>; +def : T_PPI_pat <S6_rol_i_p_nac, int_hexagon_S6_rol_i_p_nac>; +def : T_PPI_pat <S6_rol_i_p_acc, int_hexagon_S6_rol_i_p_acc>; +def : T_PPI_pat <S6_rol_i_p_and, int_hexagon_S6_rol_i_p_and>; +def : T_PPI_pat <S6_rol_i_p_or, int_hexagon_S6_rol_i_p_or>; +def : T_PPI_pat <S6_rol_i_p_xacc, int_hexagon_S6_rol_i_p_xacc>; +def : T_RRI_pat <S6_rol_i_r_nac, int_hexagon_S6_rol_i_r_nac>; +def : T_RRI_pat <S6_rol_i_r_acc, int_hexagon_S6_rol_i_r_acc>; +def : T_RRI_pat <S6_rol_i_r_and, int_hexagon_S6_rol_i_r_and>; +def : T_RRI_pat <S6_rol_i_r_or, int_hexagon_S6_rol_i_r_or>; +def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>; + +defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>; +defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>; + +def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>; + +def: Pat<(v64i16 (trunc v64i32:$Vdd)), + (v64i16 (V6_vpackwh_sat_128B + (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)), + (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>, + Requires<[UseHVXDbl]>; + + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td new file mode 100644 index 0000000..0ca95e9 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td @@ -0,0 +1,728 @@ +//=- HexagonIsetDx.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon duplex instructions. +// +//===----------------------------------------------------------------------===// + +// SA1_combine1i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine1i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#1, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b01; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SL2_jumpr31_f: Indirect conditional jump if false. +// SL2_jumpr31_f -> SL2_jumpr31_fnew +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_f: SUBInst < + (outs ), + (ins ), + "if (!p0) jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b101; + } + +// SL2_deallocframe: Deallocate stack frame. +let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in +def V4_SL2_deallocframe: SUBInst < + (outs ), + (ins ), + "deallocframe"> { + let Inst{12-6} = 0b1111100; + let Inst{2} = 0b0; + } + +// SL2_return_f: Deallocate stack frame and return. +// SL2_return_f -> SL2_return_fnew +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_f: SUBInst < + (outs ), + (ins ), + "if (!p0) dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b101; + } + +// SA1_combine3i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine3i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#3, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b11; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SS2_storebi0: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS2_storebi0: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "memb($Rs + #$u4_0)=#0"> { + bits<4> Rs; + bits<4> u4_0; + + let Inst{12-8} = 0b10010; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_0; + } + +// SA1_clrtnew: Clear if true. +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrtnew: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (p0.new) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b100; + let Inst{3-0} = Rd; + } + +// SL2_loadruh_io: Load half. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadruh_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_1Imm:$u3_1), + "$Rd = memuh($Rs + #$u3_1)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u3_1; + + let Inst{12-11} = 0b01; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + } + +// SL2_jumpr31_tnew: Indirect conditional jump if true. +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_tnew: SUBInst < + (outs ), + (ins ), + "if (p0.new) jumpr:nt r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b110; + } + +// SA1_addi: Add. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in +def V4_SA1_addi: SUBInst < + (outs IntRegs:$Rx), + (ins IntRegs:$_src_, s7Ext:$s7), + "$Rx = add($_src_, #$s7)" , + [] , + "$_src_ = $Rx"> { + bits<4> Rx; + bits<7> s7; + + let Inst{12-11} = 0b00; + let Inst{3-0} = Rx; + let Inst{10-4} = s7; + } + +// SL1_loadrub_io: Load byte. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL1_loadrub_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "$Rd = memub($Rs + #$u4_0)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u4_0; + + let Inst{12} = 0b1; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_0; + } + +// SL1_loadri_io: Load word. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL1_loadri_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "$Rd = memw($Rs + #$u4_2)"> { + bits<4> Rd; + bits<4> Rs; + bits<6> u4_2; + + let Inst{12} = 0b0; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_2{5-2}; + } + +// SA1_cmpeqi: Compareimmed. +let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_cmpeqi: SUBInst < + (outs ), + (ins IntRegs:$Rs, u2Imm:$u2), + "p0 = cmp.eq($Rs, #$u2)"> { + bits<4> Rs; + bits<2> u2; + + let Inst{12-8} = 0b11001; + let Inst{7-4} = Rs; + let Inst{1-0} = u2; + } + +// SA1_combinerz: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combinerz: SUBInst < + (outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs), + "$Rdd = combine($Rs, #0)"> { + bits<3> Rdd; + bits<4> Rs; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b1; + let Inst{3} = 0b1; + let Inst{2-0} = Rdd; + let Inst{7-4} = Rs; + } + +// SL2_return_t: Deallocate stack frame and return. +// SL2_return_t -> SL2_return_tnew +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_t: SUBInst < + (outs ), + (ins ), + "if (p0) dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b100; + } + +// SS2_allocframe: Allocate stack frame. +let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in +def V4_SS2_allocframe: SUBInst < + (outs ), + (ins u5_3Imm:$u5_3), + "allocframe(#$u5_3)"> { + bits<8> u5_3; + + let Inst{12-9} = 0b1110; + let Inst{8-4} = u5_3{7-3}; + } + +// SS2_storeh_io: Store half. +let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in +def V4_SS2_storeh_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt), + "memh($Rs + #$u3_1) = $Rt"> { + bits<4> Rs; + bits<4> u3_1; + bits<4> Rt; + + let Inst{12-11} = 0b00; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + let Inst{3-0} = Rt; + } + +// SS2_storewi0: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storewi0: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "memw($Rs + #$u4_2)=#0"> { + bits<4> Rs; + bits<6> u4_2; + + let Inst{12-8} = 0b10000; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_2{5-2}; + } + +// SS2_storewi1: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storewi1: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "memw($Rs + #$u4_2)=#1"> { + bits<4> Rs; + bits<6> u4_2; + + let Inst{12-8} = 0b10001; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_2{5-2}; + } + +// SL2_jumpr31: Indirect conditional jump if true. +let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31: SUBInst < + (outs ), + (ins ), + "jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2} = 0b0; + } + +// SA1_combinezr: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combinezr: SUBInst < + (outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs), + "$Rdd = combine(#0, $Rs)"> { + bits<3> Rdd; + bits<4> Rs; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b1; + let Inst{3} = 0b0; + let Inst{2-0} = Rdd; + let Inst{7-4} = Rs; + } + +// SL2_loadrh_io: Load half. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadrh_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_1Imm:$u3_1), + "$Rd = memh($Rs + #$u3_1)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u3_1; + + let Inst{12-11} = 0b00; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + } + +// SA1_addrx: Add. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_addrx: SUBInst < + (outs IntRegs:$Rx), + (ins IntRegs:$_src_, IntRegs:$Rs), + "$Rx = add($_src_, $Rs)" , + [] , + "$_src_ = $Rx"> { + bits<4> Rx; + bits<4> Rs; + + let Inst{12-8} = 0b11000; + let Inst{3-0} = Rx; + let Inst{7-4} = Rs; + } + +// SA1_setin1: Set to -1. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_setin1: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "$Rd = #-1"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6} = 0b0; + let Inst{3-0} = Rd; + } + +// SA1_sxth: Sxth. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_sxth: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = sxth($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10100; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_combine0i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine0i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#0, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b00; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SA1_combine2i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine2i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#2, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b10; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SA1_sxtb: Sxtb. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_sxtb: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = sxtb($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10101; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_clrf: Clear if false. +// SA1_clrf -> SA1_clrfnew +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrf: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (!p0) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b111; + let Inst{3-0} = Rd; + } + +// SL2_loadrb_io: Load byte. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadrb_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_0Imm:$u3_0), + "$Rd = memb($Rs + #$u3_0)"> { + bits<4> Rd; + bits<4> Rs; + bits<3> u3_0; + + let Inst{12-11} = 0b10; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_0; + } + +// SA1_tfr: Tfr. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_tfr: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = $Rs"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10000; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SL2_loadrd_sp: Load dword. +let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in +def V4_SL2_loadrd_sp: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u5_3Imm:$u5_3), + "$Rdd = memd(r29 + #$u5_3)"> { + bits<3> Rdd; + bits<8> u5_3; + + let Inst{12-8} = 0b11110; + let Inst{2-0} = Rdd; + let Inst{7-3} = u5_3{7-3}; + } + +// SA1_and1: And #1. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_and1: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = and($Rs, #1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10010; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SS2_storebi1: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS2_storebi1: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "memb($Rs + #$u4_0)=#1"> { + bits<4> Rs; + bits<4> u4_0; + + let Inst{12-8} = 0b10011; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_0; + } + +// SA1_inc: Inc. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_inc: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = add($Rs, #1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10001; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SS2_stored_sp: Store dword. +let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in +def V4_SS2_stored_sp: SUBInst < + (outs ), + (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt), + "memd(r29 + #$s6_3) = $Rtt"> { + bits<9> s6_3; + bits<3> Rtt; + + let Inst{12-9} = 0b0101; + let Inst{8-3} = s6_3{8-3}; + let Inst{2-0} = Rtt; + } + +// SS2_storew_sp: Store word. +let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storew_sp: SUBInst < + (outs ), + (ins u5_2Imm:$u5_2, IntRegs:$Rt), + "memw(r29 + #$u5_2) = $Rt"> { + bits<7> u5_2; + bits<4> Rt; + + let Inst{12-9} = 0b0100; + let Inst{8-4} = u5_2{6-2}; + let Inst{3-0} = Rt; + } + +// SL2_jumpr31_fnew: Indirect conditional jump if false. +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_fnew: SUBInst < + (outs ), + (ins ), + "if (!p0.new) jumpr:nt r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b111; + } + +// SA1_clrt: Clear if true. +// SA1_clrt -> SA1_clrtnew +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrt: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (p0) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b110; + let Inst{3-0} = Rd; + } + +// SL2_return: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return: SUBInst < + (outs ), + (ins ), + "dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2} = 0b0; + } + +// SA1_dec: Dec. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_dec: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = add($Rs,#-1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10011; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_seti: Set immed. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in +def V4_SA1_seti: SUBInst < + (outs IntRegs:$Rd), + (ins u6Ext:$u6), + "$Rd = #$u6"> { + bits<4> Rd; + bits<6> u6; + + let Inst{12-10} = 0b010; + let Inst{3-0} = Rd; + let Inst{9-4} = u6; + } + +// SL2_jumpr31_t: Indirect conditional jump if true. +// SL2_jumpr31_t -> SL2_jumpr31_tnew +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_t: SUBInst < + (outs ), + (ins ), + "if (p0) jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b100; + } + +// SA1_clrfnew: Clear if false. +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrfnew: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (!p0.new) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b101; + let Inst{3-0} = Rd; + } + +// SS1_storew_io: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS1_storew_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt), + "memw($Rs + #$u4_2) = $Rt"> { + bits<4> Rs; + bits<6> u4_2; + bits<4> Rt; + + let Inst{12} = 0b0; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_2{5-2}; + let Inst{3-0} = Rt; + } + +// SA1_zxtb: Zxtb. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_zxtb: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = and($Rs, #255)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10111; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_addsp: Add. +let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_addsp: SUBInst < + (outs IntRegs:$Rd), + (ins u6_2Imm:$u6_2), + "$Rd = add(r29, #$u6_2)"> { + bits<4> Rd; + bits<8> u6_2; + + let Inst{12-10} = 0b011; + let Inst{3-0} = Rd; + let Inst{9-4} = u6_2{7-2}; + } + +// SL2_loadri_sp: Load word. +let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadri_sp: SUBInst < + (outs IntRegs:$Rd), + (ins u5_2Imm:$u5_2), + "$Rd = memw(r29 + #$u5_2)"> { + bits<4> Rd; + bits<7> u5_2; + + let Inst{12-9} = 0b1110; + let Inst{3-0} = Rd; + let Inst{8-4} = u5_2{6-2}; + } + +// SS1_storeb_io: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS1_storeb_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt), + "memb($Rs + #$u4_0) = $Rt"> { + bits<4> Rs; + bits<4> u4_0; + bits<4> Rt; + + let Inst{12} = 0b1; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_0; + let Inst{3-0} = Rt; + } + +// SL2_return_tnew: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_tnew: SUBInst < + (outs ), + (ins ), + "if (p0.new) dealloc_return:nt"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b110; + } + +// SL2_return_fnew: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_fnew: SUBInst < + (outs ), + (ins ), + "if (!p0.new) dealloc_return:nt"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b111; + } + +// SA1_zxth: Zxth. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_zxth: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = zxth($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10110; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp new file mode 100644 index 0000000..624c0f6 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -0,0 +1,146 @@ +//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Hexagon MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +namespace llvm { + void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP); +} + +static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + HexagonAsmPrinter &Printer) { + MCContext &MC = Printer.OutContext; + const MCExpr *ME; + + // Populate the relocation type based on Hexagon target flags + // set on an operand + MCSymbolRefExpr::VariantKind RelocationType; + switch (MO.getTargetFlags()) { + default: + RelocationType = MCSymbolRefExpr::VK_None; + break; + case HexagonII::MO_PCREL: + RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL; + break; + case HexagonII::MO_GOT: + RelocationType = MCSymbolRefExpr::VK_GOT; + break; + case HexagonII::MO_LO16: + RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16; + break; + case HexagonII::MO_HI16: + RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16; + break; + case HexagonII::MO_GPREL: + RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL; + break; + } + + ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC); + + if (!MO.isJTI() && MO.getOffset()) + ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC), + MC); + + return MCOperand::createExpr(ME); +} + +// Create an MCInst from a MachineInstr +void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP) { + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + HexagonMCInstrInfo::setInnerLoop(MCB); + return; + } + if (MI->getOpcode() == Hexagon::ENDLOOP1) { + HexagonMCInstrInfo::setOuterLoop(MCB); + return; + } + MCInst *MCI = new (AP.OutContext) MCInst; + MCI->setOpcode(MI->getOpcode()); + assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) && + "MCI opcode should have been set on construction"); + bool MustExtend = false; + + for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCO; + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) + MustExtend = true; + + switch (MO.getType()) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCO = MCOperand::createReg(MO.getReg()); + break; + case MachineOperand::MO_FPImmediate: { + APFloat Val = MO.getFPImm()->getValueAPF(); + // FP immediates are used only when setting GPRs, so they may be dealt + // with like regular immediates from this point on. + MCO = MCOperand::createExpr( + MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(), + AP.OutContext)); + break; + } + case MachineOperand::MO_Immediate: + MCO = MCOperand::createExpr( + MCConstantExpr::create(MO.getImm(), AP.OutContext)); + break; + case MachineOperand::MO_MachineBasicBlock: + MCO = MCOperand::createExpr + (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), + AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP); + break; + case MachineOperand::MO_ExternalSymbol: + MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), + AP); + break; + case MachineOperand::MO_JumpTableIndex: + MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_BlockAddress: + MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); + break; + } + + MCI->addOperand(MCO); + } + AP.HexagonProcessInstruction(*MCI, *MI); + HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI, + MustExtend); + MCB.addOperand(MCOperand::createInst(MCI)); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp new file mode 100644 index 0000000..9579c8b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp @@ -0,0 +1,16 @@ +//= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMachineFunctionInfo.h" + +using namespace llvm; + +// pin vtable to this file +void HexagonMachineFunctionInfo::anchor() {} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h new file mode 100644 index 0000000..7672358 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -0,0 +1,85 @@ +//=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +#include <map> + +namespace llvm { + + namespace Hexagon { + const unsigned int StartPacket = 0x1; + const unsigned int EndPacket = 0x2; + } + + +/// Hexagon target-specific information for each MachineFunction. +class HexagonMachineFunctionInfo : public MachineFunctionInfo { + // SRetReturnReg - Some subtargets require that sret lowering includes + // returning the value of the returned struct in a register. This field + // holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + unsigned StackAlignBaseReg; + std::vector<MachineInstr*> AllocaAdjustInsts; + int VarArgsFrameIndex; + bool HasClobberLR; + bool HasEHReturn; + std::map<const MachineInstr*, unsigned> PacketInfo; + virtual void anchor(); + +public: + HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseReg(0), + HasClobberLR(0), HasEHReturn(false) {} + + HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), + StackAlignBaseReg(0), + HasClobberLR(0), + HasEHReturn(false) {} + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void addAllocaAdjustInst(MachineInstr* MI) { + AllocaAdjustInsts.push_back(MI); + } + const std::vector<MachineInstr*>& getAllocaAdjustInsts() { + return AllocaAdjustInsts; + } + + void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; } + int getVarArgsFrameIndex() { return VarArgsFrameIndex; } + + void setStartPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::StartPacket; + } + void setEndPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::EndPacket; + } + bool isStartPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::StartPacket)); + } + bool isEndPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::EndPacket)); + } + void setHasClobberLR(bool v) { HasClobberLR = v; } + bool hasClobberLR() const { return HasClobberLR; } + + bool hasEHReturn() const { return HasEHReturn; }; + void setHasEHReturn(bool H = true) { HasEHReturn = H; }; + + void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; } + unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; } +}; +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp new file mode 100644 index 0000000..7a52d68 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -0,0 +1,699 @@ +//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMachineScheduler.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/IR/Function.h" + +using namespace llvm; + +#define DEBUG_TYPE "misched" + +/// Platform-specific modifications to DAG. +void VLIWMachineScheduler::postprocessDAG() { + SUnit* LastSequentialCall = nullptr; + // Currently we only catch the situation when compare gets scheduled + // before preceding call. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + // Remember the call. + if (SUnits[su].getInstr()->isCall()) + LastSequentialCall = &(SUnits[su]); + // Look for a compare that defines a predicate. + else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall) + SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + } +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +/// It is _not_ precise (statefull), it is more like +/// another heuristic. Many corner cases are figured +/// empirically. +bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getInstr()) + return false; + + // First see if the pipeline could receive this instruction + // in the current cycle. + switch (SU->getInstr()->getOpcode()) { + default: + if (!ResourcesModel->canReserveResources(SU->getInstr())) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + + // Now see if there are no other dependencies to instructions already + // in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + if (Packet[i]->Succs.size() == 0) + continue; + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + } + return true; +} + +/// Keep track of available resources. +bool VLIWResourceModel::reserveResources(SUnit *SU) { + bool startNewCycle = false; + // Artificially reset state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + return false; + } + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU)) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + switch (SU->getInstr()->getOpcode()) { + default: + ResourcesModel->reserveResources(SU->getInstr()); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::EH_LABEL: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + Packet.push_back(SU); + +#ifndef NDEBUG + DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + DEBUG(dbgs() << "\t[" << i << "] SU("); + DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); + DEBUG(Packet[i]->getInstr()->dump()); + } +#endif + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= SchedModel->getIssueWidth()) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + return startNewCycle; +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +void VLIWMachineScheduler::schedule() { + DEBUG(dbgs() + << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() + << " " << BB->getName() + << " in_func " << BB->getParent()->getFunction()->getName() + << " at loop depth " << MLI->getLoopDepth(BB) + << " \n"); + + buildDAGWithRegPressure(); + + // Postprocess the DAG to add platform-specific artificial dependencies. + postprocessDAG(); + + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + SchedImpl->initialize(this); + + // To view Height/Depth correctly, they should be accessed at least once. + // + // FIXME: SUnit::dumpAll always recompute depth and height now. The max + // depth/height could be computed directly from the roots and leaves. + DEBUG(unsigned maxH = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getHeight() > maxH) + maxH = SUnits[su].getHeight(); + dbgs() << "Max Height " << maxH << "\n";); + DEBUG(unsigned maxD = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getDepth() > maxD) + maxD = SUnits[su].getDepth(); + dbgs() << "Max Depth " << maxD << "\n";); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (true) { + DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); +} + +void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { + DAG = static_cast<VLIWMachineScheduler*>(dag); + SchedModel = DAG->getSchedModel(); + + Top.init(DAG, SchedModel); + Bot.init(DAG, SchedModel); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); + const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + delete Top.HazardRec; + delete Bot.HazardRec; + Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + + delete Top.ResourceModel; + delete Bot.ResourceModel; + Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); + Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); + + assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned MinLatency = I->getLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if (IssueCount + uops > SchedModel->getIssueWidth()) + return true; + + return false; +} + +void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { + unsigned Width = SchedModel->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + + DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { + bool startNewCycle = false; + + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + + // Update DFA model. + startNewCycle = ResourceModel->reserveResources(SU); + + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + if (startNewCycle) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } + else + DEBUG(dbgs() << "*** IssueCount " << IssueCount + << " at cycle " << CurrCycle << '\n'); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + ResourceModel->reserveResources(nullptr); + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return nullptr; +} + +#ifndef NDEBUG +void ConvergingVLIWScheduler::traceCandidate(const char *Label, + const ReadyQueue &Q, + SUnit *SU, PressureChange P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" + << P.getUnitInc() << " "; + else + dbgs() << " "; + SU->dump(DAG); +} +#endif + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = nullptr; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return nullptr; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledSucc(SUnit *SU) { + SUnit *OnlyAvailableSucc = nullptr; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + SUnit &Succ = *I->getSUnit(); + if (!Succ.isScheduled) { + // We found an available, but not scheduled, successor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) + return nullptr; + OnlyAvailableSucc = &Succ; + } + } + return OnlyAvailableSucc; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 50; +static const unsigned ScaleTwo = 10; +static const unsigned FactorOne = 2; + +/// Single point to compute overall scheduling cost. +/// TODO: More heuristics will be used soon. +int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (!SU || SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Critical path first. + if (Q.getID() == TopQID) { + ResCount += (SU->getHeight() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Top.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } else { + ResCount += (SU->getDepth() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Bot.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } + + unsigned NumNodesBlocking = 0; + if (Q.getID() == TopQID) { + // How many SUs does it block from scheduling? + // Look at all of the successors of this node. + // Count the number of nodes that + // this node is the sole unscheduled node for. + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + } else { + // How many unscheduled predecessors block this node? + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (getSingleUnscheduledSucc(I->getSUnit()) == SU) + ++NumNodesBlocking; + } + ResCount += (NumNodesBlocking * ScaleTwo); + + // Factor in reg pressure as a heuristic. + ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo); + ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo); + + DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); + + return ResCount; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: +pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + + // Best cost. + if (CurrentCost > Candidate.SCost) { + DEBUG(traceCandidate("CCAND", Q, *I)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = pickNodeFromQueue(Bot.Available, + DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = pickNodeFromQueue(Top.Available, + DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + IsTopNode = true; + return TopCand.SU; + } + if (TopCand.SCost > BotCand.SCost) { + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + IsTopNode = false; + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + if (llvm::ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; + } else if (llvm::ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; + SU->dump(DAG)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, VLIWMachineScheduler needs +/// to update it's state based on the current cycle before MachineSchedStrategy +/// does. +void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h new file mode 100644 index 0000000..6034344 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -0,0 +1,244 @@ +//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Custom Hexagon MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H + +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace llvm { +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of the standard +// MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +class VLIWResourceModel { + /// ResourcesModel - Represents VLIW state. + /// Not limited to VLIW targets per say, but assumes + /// definition of DFA by a target. + DFAPacketizer *ResourcesModel; + + const TargetSchedModel *SchedModel; + + /// Local packet/bundle model. Purely + /// internal to the MI schedulre at the time. + std::vector<SUnit*> Packet; + + /// Total packets created. + unsigned TotalPackets; + +public: + VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM) + : SchedModel(SM), TotalPackets(0) { + ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI); + + // This hard requirement could be relaxed, + // but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.resize(SchedModel->getIssueWidth()); + Packet.clear(); + ResourcesModel->clearResources(); + } + + ~VLIWResourceModel() { + delete ResourcesModel; + } + + void resetPacketState() { + Packet.clear(); + } + + void resetDFA() { + ResourcesModel->clearResources(); + } + + void reset() { + Packet.clear(); + ResourcesModel->clearResources(); + } + + bool isResourceAvailable(SUnit *SU); + bool reserveResources(SUnit *SU); + unsigned getTotalPackets() const { return TotalPackets; } +}; + +/// Extend the standard ScheduleDAGMI to provide more context and override the +/// top-level schedule() driver. +class VLIWMachineScheduler : public ScheduleDAGMILive { +public: + VLIWMachineScheduler(MachineSchedContext *C, + std::unique_ptr<MachineSchedStrategy> S) + : ScheduleDAGMILive(C, std::move(S)) {} + + /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's + /// time to do some work. + void schedule() override; + /// Perform platform-specific DAG postprocessing. + void postprocessDAG(); +}; + +/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics +/// to balance the schedule. +class ConvergingVLIWScheduler : public MachineSchedStrategy { + + /// Store the state used by ConvergingVLIWScheduler heuristics, required + /// for the lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Best scheduling cost. + int SCost; + + SchedCandidate(): SU(nullptr), SCost(0) {} + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure, + BestCost}; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct VLIWSchedBoundary { + VLIWMachineScheduler *DAG; + const TargetSchedModel *SchedModel; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + ScheduleHazardRecognizer *HazardRec; + VLIWResourceModel *ResourceModel; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + VLIWSchedBoundary(unsigned ID, const Twine &Name): + DAG(nullptr), SchedModel(nullptr), Available(ID, Name+".A"), + Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), + CheckPending(false), HazardRec(nullptr), ResourceModel(nullptr), + CurrCycle(0), IssueCount(0), + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + + ~VLIWSchedBoundary() { + delete ResourceModel; + delete HazardRec; + } + + void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) { + DAG = dag; + SchedModel = smodel; + } + + bool isTop() const { + return Available.getID() == ConvergingVLIWScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + + VLIWMachineScheduler *DAG; + const TargetSchedModel *SchedModel; + + // State of the top and bottom scheduled instruction boundaries. + VLIWSchedBoundary Top; + VLIWSchedBoundary Bot; + +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingVLIWScheduler() + : DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"), + Bot(BotQID, "BotQ") {} + + void initialize(ScheduleDAGMI *dag) override; + + SUnit *pickNode(bool &IsTopNode) override; + + void schedNode(SUnit *SU, bool IsTopNode) override; + + void releaseTopNode(SUnit *SU) override; + + void releaseBottomNode(SUnit *SU) override; + + unsigned ReportPackets() { + return Top.ResourceModel->getTotalPackets() + + Bot.ResourceModel->getTotalPackets(); + } + +protected: + SUnit *pickNodeBidrectional(bool &IsTopNode); + + int SchedulingCost(ReadyQueue &Q, + SUnit *SU, SchedCandidate &Candidate, + RegPressureDelta &Delta, bool verbose); + + CandResult pickNodeFromQueue(ReadyQueue &Q, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + PressureChange P = PressureChange()); +#endif +}; + +} // namespace + + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp new file mode 100644 index 0000000..20c4ab1 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -0,0 +1,689 @@ +//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements NewValueJump pass in Hexagon. +// Ideally, we should merge this as a Peephole pass prior to register +// allocation, but because we have a spill in between the feeder and new value +// jump instructions, we are forced to write after register allocation. +// Having said that, we should re-attempt to pull this earlier at some point +// in future. + +// The basic approach looks for sequence of predicated jump, compare instruciton +// that genereates the predicate and, the feeder to the predicate. Once it finds +// all, it collapses compare and jump instruction into a new valu jump +// intstructions. +// +// +//===----------------------------------------------------------------------===// +#include "llvm/PassSupport.h" +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <map> +using namespace llvm; + +#define DEBUG_TYPE "hexagon-nvj" + +STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created"); + +static cl::opt<int> +DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, cl::desc( + "Maximum number of predicated jumps to be converted to New Value Jump")); + +static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable New Value Jumps")); + +namespace llvm { + FunctionPass *createHexagonNewValueJump(); + void initializeHexagonNewValueJumpPass(PassRegistry&); +} + + +namespace { + struct HexagonNewValueJump : public MachineFunctionPass { + const HexagonInstrInfo *QII; + const HexagonRegisterInfo *QRI; + + public: + static char ID; + + HexagonNewValueJump() : MachineFunctionPass(ID) { + initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const override { + return "Hexagon NewValueJump"; + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + private: + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + bool isNewValueJumpCandidate(const MachineInstr *MI) const; + }; + +} // end of anonymous namespace + +char HexagonNewValueJump::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj", + "Hexagon NewValueJump", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj", + "Hexagon NewValueJump", false, false) + + +// We have identified this II could be feeder to NVJ, +// verify that it can be. +static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, + const TargetRegisterInfo *TRI, + MachineBasicBlock::iterator II, + MachineBasicBlock::iterator end, + MachineBasicBlock::iterator skip, + MachineFunction &MF) { + + // Predicated instruction can not be feeder to NVJ. + if (QII->isPredicated(II)) + return false; + + // Bail out if feederReg is a paired register (double regs in + // our case). One would think that we can check to see if a given + // register cmpReg1 or cmpReg2 is a sub register of feederReg + // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic + // before the callsite of this function + // But we can not as it comes in the following fashion. + // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill> + // %R0<def> = KILL %R0, %D0<imp-use,kill> + // %P0<def> = CMPEQri %R0<kill>, 0 + // Hence, we need to check if it's a KILL instruction. + if (II->getOpcode() == TargetOpcode::KILL) + return false; + + + // Make sure there there is no 'def' or 'use' of any of the uses of + // feeder insn between it's definition, this MI and jump, jmpInst + // skipping compare, cmpInst. + // Here's the example. + // r21=memub(r22+r24<<#0) + // p0 = cmp.eq(r21, #0) + // r4=memub(r3+r21<<#0) + // if (p0.new) jump:t .LBB29_45 + // Without this check, it will be converted into + // r4=memub(r3+r21<<#0) + // r21=memub(r22+r24<<#0) + // p0 = cmp.eq(r21, #0) + // if (p0.new) jump:t .LBB29_45 + // and result WAR hazards if converted to New Value Jump. + + for (unsigned i = 0; i < II->getNumOperands(); ++i) { + if (II->getOperand(i).isReg() && + (II->getOperand(i).isUse() || II->getOperand(i).isDef())) { + MachineBasicBlock::iterator localII = II; + ++localII; + unsigned Reg = II->getOperand(i).getReg(); + for (MachineBasicBlock::iterator localBegin = localII; + localBegin != end; ++localBegin) { + if (localBegin == skip ) continue; + // Check for Subregisters too. + if (localBegin->modifiesRegister(Reg, TRI) || + localBegin->readsRegister(Reg, TRI)) + return false; + } + } + } + return true; +} + +// These are the common checks that need to performed +// to determine if +// 1. compare instruction can be moved before jump. +// 2. feeder to the compare instruction can be moved before jump. +static bool commonChecksToProhibitNewValueJump(bool afterRA, + MachineBasicBlock::iterator MII) { + + // If store in path, bail out. + if (MII->getDesc().mayStore()) + return false; + + // if call in path, bail out. + if (MII->getOpcode() == Hexagon::J2_call) + return false; + + // if NVJ is running prior to RA, do the following checks. + if (!afterRA) { + // The following Target Opcode instructions are spurious + // to new value jump. If they are in the path, bail out. + // KILL sets kill flag on the opcode. It also sets up a + // single register, out of pair. + // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill> + // %R0<def> = KILL %R0, %D0<imp-use,kill> + // %P0<def> = CMPEQri %R0<kill>, 0 + // PHI can be anything after RA. + // COPY can remateriaze things in between feeder, compare and nvj. + if (MII->getOpcode() == TargetOpcode::KILL || + MII->getOpcode() == TargetOpcode::PHI || + MII->getOpcode() == TargetOpcode::COPY) + return false; + + // The following pseudo Hexagon instructions sets "use" and "def" + // of registers by individual passes in the backend. At this time, + // we don't know the scope of usage and definitions of these + // instructions. + if (MII->getOpcode() == Hexagon::LDriw_pred || + MII->getOpcode() == Hexagon::STriw_pred) + return false; + } + + return true; +} + +static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, + const TargetRegisterInfo *TRI, + MachineBasicBlock::iterator II, + unsigned pReg, + bool secondReg, + bool optLocation, + MachineBasicBlock::iterator end, + MachineFunction &MF) { + + MachineInstr *MI = II; + + // If the second operand of the compare is an imm, make sure it's in the + // range specified by the arch. + if (!secondReg) { + int64_t v = MI->getOperand(2).getImm(); + + if (!(isUInt<5>(v) || + ((MI->getOpcode() == Hexagon::C2_cmpeqi || + MI->getOpcode() == Hexagon::C2_cmpgti) && + (v == -1)))) + return false; + } + + unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. + cmpReg1 = MI->getOperand(1).getReg(); + + if (secondReg) { + cmpOp2 = MI->getOperand(2).getReg(); + + // Make sure that that second register is not from COPY + // At machine code level, we don't need this, but if we decide + // to move new value jump prior to RA, we would be needing this. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { + MachineInstr *def = MRI.getVRegDef(cmpOp2); + if (def->getOpcode() == TargetOpcode::COPY) + return false; + } + } + + // Walk the instructions after the compare (predicate def) to the jump, + // and satisfy the following conditions. + ++II ; + for (MachineBasicBlock::iterator localII = II; localII != end; + ++localII) { + + // Check 1. + // If "common" checks fail, bail out. + if (!commonChecksToProhibitNewValueJump(optLocation, localII)) + return false; + + // Check 2. + // If there is a def or use of predicate (result of compare), bail out. + if (localII->modifiesRegister(pReg, TRI) || + localII->readsRegister(pReg, TRI)) + return false; + + // Check 3. + // If there is a def of any of the use of the compare (operands of compare), + // bail out. + // Eg. + // p0 = cmp.eq(r2, r0) + // r2 = r4 + // if (p0.new) jump:t .LBB28_3 + if (localII->modifiesRegister(cmpReg1, TRI) || + (secondReg && localII->modifiesRegister(cmpOp2, TRI))) + return false; + } + return true; +} + + +// Given a compare operator, return a matching New Value Jump compare operator. +// Make sure that MI here is included in isNewValueJumpCandidate. +static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, + bool secondRegNewified, + MachineBasicBlock *jmpTarget, + const MachineBranchProbabilityInfo + *MBPI) { + bool taken = false; + MachineBasicBlock *Src = MI->getParent(); + const BranchProbability Prediction = + MBPI->getEdgeProbability(Src, jmpTarget); + + if (Prediction >= BranchProbability(1,2)) + taken = true; + + switch (MI->getOpcode()) { + case Hexagon::C2_cmpeq: + return taken ? Hexagon::J4_cmpeq_t_jumpnv_t + : Hexagon::J4_cmpeq_t_jumpnv_nt; + + case Hexagon::C2_cmpeqi: { + if (reg >= 0) + return taken ? Hexagon::J4_cmpeqi_t_jumpnv_t + : Hexagon::J4_cmpeqi_t_jumpnv_nt; + else + return taken ? Hexagon::J4_cmpeqn1_t_jumpnv_t + : Hexagon::J4_cmpeqn1_t_jumpnv_nt; + } + + case Hexagon::C2_cmpgt: { + if (secondRegNewified) + return taken ? Hexagon::J4_cmplt_t_jumpnv_t + : Hexagon::J4_cmplt_t_jumpnv_nt; + else + return taken ? Hexagon::J4_cmpgt_t_jumpnv_t + : Hexagon::J4_cmpgt_t_jumpnv_nt; + } + + case Hexagon::C2_cmpgti: { + if (reg >= 0) + return taken ? Hexagon::J4_cmpgti_t_jumpnv_t + : Hexagon::J4_cmpgti_t_jumpnv_nt; + else + return taken ? Hexagon::J4_cmpgtn1_t_jumpnv_t + : Hexagon::J4_cmpgtn1_t_jumpnv_nt; + } + + case Hexagon::C2_cmpgtu: { + if (secondRegNewified) + return taken ? Hexagon::J4_cmpltu_t_jumpnv_t + : Hexagon::J4_cmpltu_t_jumpnv_nt; + else + return taken ? Hexagon::J4_cmpgtu_t_jumpnv_t + : Hexagon::J4_cmpgtu_t_jumpnv_nt; + } + + case Hexagon::C2_cmpgtui: + return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t + : Hexagon::J4_cmpgtui_t_jumpnv_nt; + + case Hexagon::C4_cmpneq: + return taken ? Hexagon::J4_cmpeq_f_jumpnv_t + : Hexagon::J4_cmpeq_f_jumpnv_nt; + + case Hexagon::C4_cmplte: + if (secondRegNewified) + return taken ? Hexagon::J4_cmplt_f_jumpnv_t + : Hexagon::J4_cmplt_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgt_f_jumpnv_t + : Hexagon::J4_cmpgt_f_jumpnv_nt; + + case Hexagon::C4_cmplteu: + if (secondRegNewified) + return taken ? Hexagon::J4_cmpltu_f_jumpnv_t + : Hexagon::J4_cmpltu_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t + : Hexagon::J4_cmpgtu_f_jumpnv_nt; + + default: + llvm_unreachable("Could not find matching New Value Jump instruction."); + } + // return *some value* to avoid compiler warning + return 0; +} + +bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI) + const { + switch (MI->getOpcode()) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + return true; + + default: + return false; + } +} + + +bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { + + DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" + << "********** Function: " + << MF.getName() << "\n"); + + // If we move NewValueJump before register allocation we'll need live variable + // analysis here too. + + QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); + QRI = static_cast<const HexagonRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + + if (DisableNewValueJumps) { + return false; + } + + int nvjCount = DbgNVJCount; + int nvjGenerated = 0; + + // Loop through all the bb's of the function + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock *MBB = &*MBBb; + + DEBUG(dbgs() << "** dumping bb ** " + << MBB->getNumber() << "\n"); + DEBUG(MBB->dump()); + DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); + bool foundJump = false; + bool foundCompare = false; + bool invertPredicate = false; + unsigned predReg = 0; // predicate reg of the jump. + unsigned cmpReg1 = 0; + int cmpOp2 = 0; + bool MO1IsKill = false; + bool MO2IsKill = false; + MachineBasicBlock::iterator jmpPos; + MachineBasicBlock::iterator cmpPos; + MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; + MachineBasicBlock *jmpTarget = nullptr; + bool afterRA = false; + bool isSecondOpReg = false; + bool isSecondOpNewified = false; + // Traverse the basic block - bottom up + for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); + MII != E;) { + MachineInstr *MI = --MII; + if (MI->isDebugValue()) { + continue; + } + + if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) + break; + + DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); + + if (!foundJump && + (MI->getOpcode() == Hexagon::J2_jumpt || + MI->getOpcode() == Hexagon::J2_jumpf || + MI->getOpcode() == Hexagon::J2_jumptnewpt || + MI->getOpcode() == Hexagon::J2_jumptnew || + MI->getOpcode() == Hexagon::J2_jumpfnewpt || + MI->getOpcode() == Hexagon::J2_jumpfnew)) { + // This is where you would insert your compare and + // instr that feeds compare + jmpPos = MII; + jmpInstr = MI; + predReg = MI->getOperand(0).getReg(); + afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); + + // If ifconverter had not messed up with the kill flags of the + // operands, the following check on the kill flag would suffice. + // if(!jmpInstr->getOperand(0).isKill()) break; + + // This predicate register is live out out of BB + // this would only work if we can actually use Live + // variable analysis on phy regs - but LLVM does not + // provide LV analysis on phys regs. + //if(LVs.isLiveOut(predReg, *MBB)) break; + + // Get all the successors of this block - which will always + // be 2. Check if the predicate register is live in in those + // successor. If yes, we can not delete the predicate - + // I am doing this only because LLVM does not provide LiveOut + // at the BB level. + bool predLive = false; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SIE = MBB->succ_end(); SI != SIE; ++SI) { + MachineBasicBlock* succMBB = *SI; + if (succMBB->isLiveIn(predReg)) { + predLive = true; + } + } + if (predLive) + break; + + jmpTarget = MI->getOperand(1).getMBB(); + foundJump = true; + if (MI->getOpcode() == Hexagon::J2_jumpf || + MI->getOpcode() == Hexagon::J2_jumpfnewpt || + MI->getOpcode() == Hexagon::J2_jumpfnew) { + invertPredicate = true; + } + continue; + } + + // No new value jump if there is a barrier. A barrier has to be in its + // own packet. A barrier has zero operands. We conservatively bail out + // here if we see any instruction with zero operands. + if (foundJump && MI->getNumOperands() == 0) + break; + + if (foundJump && + !foundCompare && + MI->getOperand(0).isReg() && + MI->getOperand(0).getReg() == predReg) { + + // Not all compares can be new value compare. Arch Spec: 7.6.1.1 + if (isNewValueJumpCandidate(MI)) { + + assert((MI->getDesc().isCompare()) && + "Only compare instruction can be collapsed into New Value Jump"); + isSecondOpReg = MI->getOperand(2).isReg(); + + if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, + afterRA, jmpPos, MF)) + break; + + cmpInstr = MI; + cmpPos = MII; + foundCompare = true; + + // We need cmpReg1 and cmpOp2(imm or reg) while building + // new value jump instruction. + cmpReg1 = MI->getOperand(1).getReg(); + if (MI->getOperand(1).isKill()) + MO1IsKill = true; + + if (isSecondOpReg) { + cmpOp2 = MI->getOperand(2).getReg(); + if (MI->getOperand(2).isKill()) + MO2IsKill = true; + } else + cmpOp2 = MI->getOperand(2).getImm(); + continue; + } + } + + if (foundCompare && foundJump) { + + // If "common" checks fail, bail out on this BB. + if (!commonChecksToProhibitNewValueJump(afterRA, MII)) + break; + + bool foundFeeder = false; + MachineBasicBlock::iterator feederPos = MII; + if (MI->getOperand(0).isReg() && + MI->getOperand(0).isDef() && + (MI->getOperand(0).getReg() == cmpReg1 || + (isSecondOpReg && + MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { + + unsigned feederReg = MI->getOperand(0).getReg(); + + // First try to see if we can get the feeder from the first operand + // of the compare. If we can not, and if secondOpReg is true + // (second operand of the compare is also register), try that one. + // TODO: Try to come up with some heuristic to figure out which + // feeder would benefit. + + if (feederReg == cmpReg1) { + if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { + if (!isSecondOpReg) + break; + else + continue; + } else + foundFeeder = true; + } + + if (!foundFeeder && + isSecondOpReg && + feederReg == (unsigned) cmpOp2) + if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) + break; + + if (isSecondOpReg) { + // In case of CMPLT, or CMPLTU, or EQ with the second register + // to newify, swap the operands. + if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && + feederReg == (unsigned) cmpOp2) { + unsigned tmp = cmpReg1; + bool tmpIsKill = MO1IsKill; + cmpReg1 = cmpOp2; + MO1IsKill = MO2IsKill; + cmpOp2 = tmp; + MO2IsKill = tmpIsKill; + } + + // Now we have swapped the operands, all we need to check is, + // if the second operand (after swap) is the feeder. + // And if it is, make a note. + if (feederReg == (unsigned)cmpOp2) + isSecondOpNewified = true; + } + + // Now that we are moving feeder close the jump, + // make sure we are respecting the kill values of + // the operands of the feeder. + + bool updatedIsKill = false; + for (unsigned i = 0; i < MI->getNumOperands(); i++) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned feederReg = MO.getReg(); + for (MachineBasicBlock::iterator localII = feederPos, + end = jmpPos; localII != end; localII++) { + MachineInstr *localMI = localII; + for (unsigned j = 0; j < localMI->getNumOperands(); j++) { + MachineOperand &localMO = localMI->getOperand(j); + if (localMO.isReg() && localMO.isUse() && + localMO.isKill() && feederReg == localMO.getReg()) { + // We found that there is kill of a use register + // Set up a kill flag on the register + localMO.setIsKill(false); + MO.setIsKill(); + updatedIsKill = true; + break; + } + } + if (updatedIsKill) break; + } + } + if (updatedIsKill) break; + } + + MBB->splice(jmpPos, MI->getParent(), MI); + MBB->splice(jmpPos, MI->getParent(), cmpInstr); + DebugLoc dl = MI->getDebugLoc(); + MachineInstr *NewMI; + + assert((isNewValueJumpCandidate(cmpInstr)) && + "This compare is not a New Value Jump candidate."); + unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, + isSecondOpNewified, + jmpTarget, MBPI); + if (invertPredicate) + opc = QII->getInvertedPredicatedOpcode(opc); + + if (isSecondOpReg) + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addReg(cmpOp2, getKillRegState(MO2IsKill)) + .addMBB(jmpTarget); + + else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi || + cmpInstr->getOpcode() == Hexagon::C2_cmpgti) && + cmpOp2 == -1 ) + // Corresponding new-value compare jump instructions don't have the + // operand for -1 immediate value. + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addMBB(jmpTarget); + + else + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addImm(cmpOp2) + .addMBB(jmpTarget); + + assert(NewMI && "New Value Jump Instruction Not created!"); + (void)NewMI; + if (cmpInstr->getOperand(0).isReg() && + cmpInstr->getOperand(0).isKill()) + cmpInstr->getOperand(0).setIsKill(false); + if (cmpInstr->getOperand(1).isReg() && + cmpInstr->getOperand(1).isKill()) + cmpInstr->getOperand(1).setIsKill(false); + cmpInstr->eraseFromParent(); + jmpInstr->eraseFromParent(); + ++nvjGenerated; + ++NumNVJGenerated; + break; + } + } + } + } + + return true; + +} + +FunctionPass *llvm::createHexagonNewValueJump() { + return new HexagonNewValueJump(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td new file mode 100644 index 0000000..fbd29cd --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td @@ -0,0 +1,603 @@ +//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illnois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; } +def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; } +def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; } +def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; } +def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; } +def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; } +def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; } +def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; } +def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; } +def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; } +def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; } +def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; } +def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; } +def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; } +def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; } +def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; } +def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; } +def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; } +def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; } +def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; } +def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; } +def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; } +def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; } +def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; } +def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; } +def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; } +def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; } +def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; } +def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; } +def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; } +def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; } +def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; } +def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; } +def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; } +def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; } +// Immediate operands. + +let OperandType = "OPERAND_IMMEDIATE", + DecoderMethod = "unsignedImmDecoder" in { + def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand; + let DecoderMethod = "s32ImmDecoder"; } + def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand; + let DecoderMethod = "s8ImmDecoder"; } + def s8Imm64 : Operand<i64> { let ParserMatchClass = s8Imm64Operand; + let DecoderMethod = "s8ImmDecoder"; } + def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand; + let DecoderMethod = "s6_0ImmDecoder"; } + def s6_3Imm : Operand<i32>; + def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand; + let DecoderMethod = "s4_0ImmDecoder"; } + def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; + let DecoderMethod = "s4_0ImmDecoder"; } + def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; + let DecoderMethod = "s4_1ImmDecoder"; } + def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; + let DecoderMethod = "s4_2ImmDecoder"; } + def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; + let DecoderMethod = "s4_3ImmDecoder"; } + def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; } + def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; } + def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; } + def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; } + def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; } + def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; } + def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; } + def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; } + def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; } + def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; } + def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; } + def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; } + def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; } + def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; } + def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; } + def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; } + def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; } + def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; } + def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; } + def u5_0Imm : Operand<i32>; + def u5_1Imm : Operand<i32>; + def u5_2Imm : Operand<i32>; + def u5_3Imm : Operand<i32>; + def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; } + def u4_0Imm : Operand<i32>; + def u4_1Imm : Operand<i32>; + def u4_2Imm : Operand<i32>; + def u4_3Imm : Operand<i32>; + def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; } + def u3_0Imm : Operand<i32>; + def u3_1Imm : Operand<i32>; + def u3_2Imm : Operand<i32>; + def u3_3Imm : Operand<i32>; + def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; } + def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; } + def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; } +} + +let OperandType = "OPERAND_IMMEDIATE" in { + def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand; + let PrintMethod = "prints4_6ImmOperand"; + let DecoderMethod = "s4_6ImmDecoder";} + def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand"; + let DecoderMethod = "s4_6ImmDecoder";} + def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand; + let PrintMethod = "prints3_6ImmOperand"; + let DecoderMethod = "s3_6ImmDecoder";} + def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand"; + let DecoderMethod = "s3_6ImmDecoder";} +} + +// +// Immediate predicates +// +def s32ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<30,2>(v); +}]>; + +def s29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<29,3>(v); +}]>; + +def s16ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<16>(v); +}]>; + +def s11_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<11>(v); +}]>; + +def s11_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,1>(v); +}]>; + +def s11_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,2>(v); +}]>; + +def s11_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,3>(v); +}]>; + +def s10ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<10>(v); +}]>; + +def s8ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + +def s8Imm64Pred : PatLeaf<(i64 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + +def s6ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<6>(v); +}]>; + +def s4_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<4>(v); +}]>; + +def s4_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,1>(v); +}]>; + +def s4_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,2>(v); +}]>; + +def s4_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,3>(v); +}]>; + +def u64ImmPred : PatLeaf<(i64 imm), [{ + // Adding "N ||" to suppress gcc unused warning. + return (N || true); +}]>; + +def u32ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<31,1>(v); +}]>; + +def u30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<30,2>(v); +}]>; + +def u29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<29,3>(v); +}]>; + +def u26_6ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<26,6>(v); +}]>; + +def u16_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<16>(v); +}]>; + +def u16_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<16,1>(v); +}]>; + +def u16_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<16,2>(v); +}]>; + +def u11_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<11,3>(v); +}]>; + +def u10ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<10>(v); +}]>; + +def u9ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<9>(v); +}]>; + +def u8ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<8>(v); +}]>; + +def u7StrictPosImmPred : ImmLeaf<i32, [{ + // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit + // unsigned field and is strictly greater than 0. + return isUInt<7>(Imm) && Imm > 0; +}]>; + +def u7ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<7>(v); +}]>; + +def u6ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,1>(v); +}]>; + +def u6_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,2>(v); +}]>; + +def u6_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,3>(v); +}]>; + +def u5ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<5>(v); +}]>; + +def u4ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<4>(v); +}]>; + +def u3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<3>(v); +}]>; + +def u2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<2>(v); +}]>; + +def u1ImmPred : PatLeaf<(i1 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<1>(v); +}]>; + +def u1ImmPred32 : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<1>(v); +}]>; + +def m5BImmPred : PatLeaf<(i32 imm), [{ + // m5BImmPred predicate - True if the (char) number is in range -1 .. -31 + // and will fit in a 5 bit field when made positive, for use in memops. + // this is specific to the zero extending of a negative by CombineInstr + int8_t v = (int8_t)N->getSExtValue(); + return (-31 <= v && v <= -1); +}]>; + +def m5HImmPred : PatLeaf<(i32 imm), [{ + // m5HImmPred predicate - True if the (short) number is in range -1 .. -31 + // and will fit in a 5 bit field when made positive, for use in memops. + // this is specific to the zero extending of a negative by CombineInstr + int16_t v = (int16_t)N->getSExtValue(); + return (-31 <= v && v <= -1); +}]>; + +def m5ImmPred : PatLeaf<(i32 imm), [{ + // m5ImmPred predicate - True if the number is in range -1 .. -31 + // and will fit in a 5 bit field when made positive, for use in memops. + int64_t v = (int64_t)N->getSExtValue(); + return (-31 <= v && v <= -1); +}]>; + +//InN means negative integers in [-(2^N - 1), 0] +def n8ImmPred : PatLeaf<(i32 imm), [{ + // n8ImmPred predicate - True if the immediate fits in a 8-bit signed + // field. + int64_t v = (int64_t)N->getSExtValue(); + return (-255 <= v && v <= 0); +}]>; + +def nOneImmPred : PatLeaf<(i32 imm), [{ + // nOneImmPred predicate - True if the immediate is -1. + int64_t v = (int64_t)N->getSExtValue(); + return (-1 == v); +}]>; + +def Set5ImmPred : PatLeaf<(i32 imm), [{ + // Set5ImmPred predicate - True if the number is in the series of values. + // [ 2^0, 2^1, ... 2^31 ] + // For use in setbit immediate. + uint32_t v = (int32_t)N->getSExtValue(); + // Constrain to 32 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def Clr5ImmPred : PatLeaf<(i32 imm), [{ + // Clr5ImmPred predicate - True if the number is in the series of + // bit negated values. + // [ 2^0, 2^1, ... 2^31 ] + // For use in clrbit immediate. + // Note: we are bit NOTing the value. + uint32_t v = ~ (int32_t)N->getSExtValue(); + // Constrain to 32 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def SetClr5ImmPred : PatLeaf<(i32 imm), [{ + // True if the immediate is in range 0..31. + int32_t v = (int32_t)N->getSExtValue(); + return (v >= 0 && v <= 31); +}]>; + +def Set4ImmPred : PatLeaf<(i32 imm), [{ + // Set4ImmPred predicate - True if the number is in the series of values: + // [ 2^0, 2^1, ... 2^15 ]. + // For use in setbit immediate. + uint16_t v = (int16_t)N->getSExtValue(); + // Constrain to 16 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def Clr4ImmPred : PatLeaf<(i32 imm), [{ + // Clr4ImmPred predicate - True if the number is in the series of + // bit negated values: + // [ 2^0, 2^1, ... 2^15 ]. + // For use in setbit and clrbit immediate. + uint16_t v = ~ (int16_t)N->getSExtValue(); + // Constrain to 16 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def SetClr4ImmPred : PatLeaf<(i32 imm), [{ + // True if the immediate is in the range 0..15. + int16_t v = (int16_t)N->getSExtValue(); + return (v >= 0 && v <= 15); +}]>; + +def Set3ImmPred : PatLeaf<(i32 imm), [{ + // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ]. + // For use in setbit immediate. + uint8_t v = (int8_t)N->getSExtValue(); + // Constrain to 8 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def Clr3ImmPred : PatLeaf<(i32 imm), [{ + // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ]. + // For use in setbit and clrbit immediate. + uint8_t v = ~ (int8_t)N->getSExtValue(); + // Constrain to 8 bits, and then check for single bit. + return ImmIsSingleBit(v); +}]>; + +def SetClr3ImmPred : PatLeaf<(i32 imm), [{ + // True if the immediate is in the range 0..7. + int8_t v = (int8_t)N->getSExtValue(); + return (v >= 0 && v <= 7); +}]>; + + +// Extendable immediate operands. +def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; } +def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; } +def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; } +def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; } +def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; } +def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; } +def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; } +def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; } +def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; } +def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; } +def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; } +def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; } +def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; } +def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; } +def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; } +def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; } +def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; } +def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; } +def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; } +def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; } +def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; } +def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; } + + + +let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand", + DecoderMethod = "unsignedImmDecoder" in { + def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; } + def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand; + let DecoderMethod = "s16ImmDecoder"; } + def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand; + let DecoderMethod = "s12ImmDecoder"; } + def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand; + let DecoderMethod = "s11_0ImmDecoder"; } + def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand; + let DecoderMethod = "s11_1ImmDecoder"; } + def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand; + let DecoderMethod = "s11_2ImmDecoder"; } + def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand; + let DecoderMethod = "s11_3ImmDecoder"; } + def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand; + let DecoderMethod = "s10ImmDecoder"; } + def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand; + let DecoderMethod = "s90ImmDecoder"; } + def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand; + let DecoderMethod = "s8ImmDecoder"; } + def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; } + def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand; + let DecoderMethod = "s6_0ImmDecoder"; } + def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; } + def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; } + def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; } + def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; } + def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; } + def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; } + def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; } + def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; } + def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; } + def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; } +} + + +def s4_7ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 10-bit sign extended field and + // is 128-byte aligned. + return isShiftedInt<4,7>(v); + return false; +}]>; + +def s3_7ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 9-bit sign extended field and + // is 128-byte aligned. + return isShiftedInt<3,7>(v); + return false; +}]>; + +def s4_6ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 10-bit sign extended field and + // is 64-byte aligned. + return isShiftedInt<4,6>(v); + return false; +}]>; + +def s3_6ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 9-bit sign extended field and + // is 64-byte aligned. + return isShiftedInt<3,6>(v); + return false; +}]>; + + +// This complex pattern exists only to create a machine instruction operand +// of type "frame index". There doesn't seem to be a way to do that directly +// in the patterns. +def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>; + +// These complex patterns are not strictly necessary, since global address +// folding will happen during DAG combining. For distinguishing between GA +// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. +def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>; +def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>; + +// Address operands. + +let PrintMethod = "printGlobalOperand" in { + def globaladdress : Operand<i32>; + def globaladdressExt : Operand<i32>; +} + +let PrintMethod = "printJumpTable" in +def jumptablebase : Operand<i32>; + +def brtarget : Operand<OtherVT> { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; +} +def brtargetExt : Operand<OtherVT> { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; +} +def calltarget : Operand<i32> { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; +} + +def bblabel : Operand<i32>; +def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">; + +// Return true if for a 32 to 64-bit sign-extended load. +def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{ + LoadSDNode *LD = dyn_cast<LoadSDNode>(N); + if (!LD) + return false; + return LD->getExtensionType() == ISD::SEXTLOAD && + LD->getMemoryVT().getScalarType() == MVT::i32; +}]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp new file mode 100644 index 0000000..1723771 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp @@ -0,0 +1,150 @@ +//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass that removes sign extends for function parameters. These parameters +// are already sign extended by the caller per Hexagon's ABI +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" + +#include "Hexagon.h" + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonOptimizeSZextends(); + void initializeHexagonOptimizeSZextendsPass(PassRegistry&); +} + +namespace { + struct HexagonOptimizeSZextends : public FunctionPass { + public: + static char ID; + HexagonOptimizeSZextends() : FunctionPass(ID) { + initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + return "Remove sign extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + FunctionPass::getAnalysisUsage(AU); + } + + bool intrinsicAlreadySextended(Intrinsic::ID IntID); + }; +} + +char HexagonOptimizeSZextends::ID = 0; + +INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs", + "Remove Sign and Zero Extends for Args", false, false) + +bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) { + switch(IntID) { + case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll: + return true; + default: + break; + } + return false; +} + +bool HexagonOptimizeSZextends::runOnFunction(Function &F) { + unsigned Idx = 1; + // Try to optimize sign extends in formal parameters. It's relying on + // callee already sign extending the values. I'm not sure if our ABI + // requires callee to sign extend though. + for (auto &Arg : F.args()) { + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { + if (!isa<PointerType>(Arg.getType())) { + for (auto UI = Arg.use_begin(); UI != Arg.use_end();) { + if (isa<SExtInst>(*UI)) { + Instruction* Use = cast<Instruction>(*UI); + SExtInst* SI = new SExtInst(&Arg, Use->getType()); + assert (EVT::getEVT(SI->getType()) == + (EVT::getEVT(Use->getType()))); + ++UI; + Use->replaceAllUsesWith(SI); + Instruction* First = &F.getEntryBlock().front(); + SI->insertBefore(First); + Use->eraseFromParent(); + } else { + ++UI; + } + } + } + } + ++Idx; + } + + // Try to remove redundant sext operations on Hexagon. The hardware + // already sign extends many 16 bit intrinsic operations to 32 bits. + // For example: + // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y) + // %sext233 = shl i32 %34, 16 + // %conv52 = ashr exact i32 %sext233, 16 + for (auto &B : F) { + for (auto &I : B) { + // Look for arithmetic shift right by 16. + BinaryOperator *Ashr = dyn_cast<BinaryOperator>(&I); + if (!(Ashr && Ashr->getOpcode() == Instruction::AShr)) + continue; + Value *AshrOp1 = Ashr->getOperand(1); + ConstantInt *C = dyn_cast<ConstantInt>(AshrOp1); + // Right shifted by 16. + if (!(C && C->getSExtValue() == 16)) + continue; + + // The first operand of Ashr comes from logical shift left. + Instruction *Shl = dyn_cast<Instruction>(Ashr->getOperand(0)); + if (!(Shl && Shl->getOpcode() == Instruction::Shl)) + continue; + Value *Intr = Shl->getOperand(0); + Value *ShlOp1 = Shl->getOperand(1); + C = dyn_cast<ConstantInt>(ShlOp1); + // Left shifted by 16. + if (!(C && C->getSExtValue() == 16)) + continue; + + // The first operand of Shl comes from an intrinsic. + if (IntrinsicInst *I = dyn_cast<IntrinsicInst>(Intr)) { + if (!intrinsicAlreadySextended(I->getIntrinsicID())) + continue; + // All is well. Replace all uses of AShr with I. + for (auto UI = Ashr->user_begin(), UE = Ashr->user_end(); + UI != UE; ++UI) { + const Use &TheUse = UI.getUse(); + if (Instruction *J = dyn_cast<Instruction>(TheUse.getUser())) { + J->replaceUsesOfWith(Ashr, I); + } + } + } + } + } + + return true; +} + + +FunctionPass *llvm::createHexagonOptimizeSZextends() { + return new HexagonOptimizeSZextends(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp new file mode 100644 index 0000000..e68ff85 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -0,0 +1,338 @@ +//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// This peephole pass optimizes in the following cases. +// 1. Optimizes redundant sign extends for the following case +// Transform the following pattern +// %vreg170<def> = SXTW %vreg166 +// ... +// %vreg176<def> = COPY %vreg170:subreg_loreg +// +// Into +// %vreg176<def> = COPY vreg166 +// +// 2. Optimizes redundant negation of predicates. +// %vreg15<def> = CMPGTrr %vreg6, %vreg2 +// ... +// %vreg16<def> = NOT_p %vreg15<kill> +// ... +// JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead> +// +// Into +// %vreg15<def> = CMPGTrr %vreg6, %vreg2; +// ... +// JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>; +// +// Note: The peephole pass makes the instrucstions like +// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill> +// redundant and relies on some form of dead removal instructions, like +// DCE or DIE to actually eliminate them. + + +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-peephole" + +static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Peephole Optimization")); + +static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of PNotP")); + +static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of Sign/Zero Extends")); + +static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of extensions to i64.")); + +namespace llvm { + FunctionPass *createHexagonPeephole(); + void initializeHexagonPeepholePass(PassRegistry&); +} + +namespace { + struct HexagonPeephole : public MachineFunctionPass { + const HexagonInstrInfo *QII; + const HexagonRegisterInfo *QRI; + const MachineRegisterInfo *MRI; + + public: + static char ID; + HexagonPeephole() : MachineFunctionPass(ID) { + initializeHexagonPeepholePass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Hexagon optimize redundant zero and size extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src); + }; +} + +char HexagonPeephole::ID = 0; + +INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole", + false, false) + +bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { + QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); + QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + DenseMap<unsigned, unsigned> PeepholeMap; + DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap; + + if (DisableHexagonPeephole) return false; + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock *MBB = &*MBBb; + PeepholeMap.clear(); + PeepholeDoubleRegsMap.clear(); + + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + // Look for sign extends: + // %vreg170<def> = SXTW %vreg166 + if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = SXTW %vreg166 + // PeepholeMap[170] = vreg166 + PeepholeMap[DstReg] = SrcReg; + } + } + + // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169) + // %vreg170:DoublRegs, %vreg169:IntRegs + if (!DisableOptExtTo64 && + MI->getOpcode () == Hexagon::A4_combineir) { + assert (MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src1.getImm() != 0) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src2.getReg(); + PeepholeMap[DstReg] = SrcReg; + } + + // Look for this sequence below + // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 + // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. + // and convert into + // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. + if (MI->getOpcode() == Hexagon::S2_lsr_i_p) { + assert(MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src2.getImm() != 32) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src1.getReg(); + PeepholeDoubleRegsMap[DstReg] = + std::make_pair(*&SrcReg, Hexagon::subreg_hireg); + } + + // Look for P=NOT(P). + if (!DisablePNotP && + (MI->getOpcode() == Hexagon::C2_not)) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = NOT_xx %vreg166 + // PeepholeMap[170] = vreg166 + PeepholeMap[DstReg] = SrcReg; + } + } + + // Look for copy: + // %vreg176<def> = COPY %vreg170:subreg_loreg + if (!DisableOptSZExt && MI->isCopy()) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + + // Make sure we are copying the lower 32 bits. + if (Src.getSubReg() != Hexagon::subreg_loreg) + continue; + + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Try to find in the map. + if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { + // Change the 1st operand. + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); + } else { + DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI = + PeepholeDoubleRegsMap.find(SrcReg); + if (DI != PeepholeDoubleRegsMap.end()) { + std::pair<unsigned,unsigned> PeepholeSrc = DI->second; + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, + false /*isDef*/, + false /*isImp*/, + false /*isKill*/, + false /*isDead*/, + false /*isUndef*/, + false /*isEarlyClobber*/, + PeepholeSrc.second)); + } + } + } + } + + // Look for Predicated instructions. + if (!DisablePNotP) { + bool Done = false; + if (QII->isPredicated(MI)) { + MachineOperand &Op0 = MI->getOperand(0); + unsigned Reg0 = Op0.getReg(); + const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); + if (RC0->getID() == Hexagon::PredRegsRegClassID) { + // Handle instructions that have a prediate register in op0 + // (most cases of predicable instructions). + if (TargetRegisterInfo::isVirtualRegister(Reg0)) { + // Try to find in the map. + if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { + // Change the 1st operand and, flip the opcode. + MI->getOperand(0).setReg(PeepholeSrc); + int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode()); + MI->setDesc(QII->get(NewOp)); + Done = true; + } + } + } + } + + if (!Done) { + // Handle special instructions. + unsigned Op = MI->getOpcode(); + unsigned NewOp = 0; + unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices. + + switch (Op) { + case Hexagon::C2_mux: + case Hexagon::C2_muxii: + NewOp = Op; + break; + case Hexagon::C2_muxri: + NewOp = Hexagon::C2_muxir; + break; + case Hexagon::C2_muxir: + NewOp = Hexagon::C2_muxri; + break; + } + if (NewOp) { + unsigned PSrc = MI->getOperand(PR).getReg(); + if (unsigned POrig = PeepholeMap.lookup(PSrc)) { + MI->getOperand(PR).setReg(POrig); + MI->setDesc(QII->get(NewOp)); + // Swap operands S1 and S2. + MachineOperand Op1 = MI->getOperand(S1); + MachineOperand Op2 = MI->getOperand(S2); + ChangeOpInto(MI->getOperand(S1), Op2); + ChangeOpInto(MI->getOperand(S2), Op1); + } + } // if (NewOp) + } // if (!Done) + + } // if (!DisablePNotP) + + } // Instruction + } // Basic Block + return true; +} + +void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { + assert (&Dst != &Src && "Cannot duplicate into itself"); + switch (Dst.getType()) { + case MachineOperand::MO_Register: + if (Src.isReg()) { + Dst.setReg(Src.getReg()); + } else if (Src.isImm()) { + Dst.ChangeToImmediate(Src.getImm()); + } else { + llvm_unreachable("Unexpected src operand type"); + } + break; + + case MachineOperand::MO_Immediate: + if (Src.isImm()) { + Dst.setImm(Src.getImm()); + } else if (Src.isReg()) { + Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(), + Src.isKill(), Src.isDead(), Src.isUndef(), + Src.isDebug()); + } else { + llvm_unreachable("Unexpected src operand type"); + } + break; + + default: + llvm_unreachable("Unexpected dst operand type"); + break; + } +} + +FunctionPass *llvm::createHexagonPeephole() { + return new HexagonPeephole(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp new file mode 100644 index 0000000..61c0589 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -0,0 +1,203 @@ +//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "HexagonRegisterInfo.h" +#include "Hexagon.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +HexagonRegisterInfo::HexagonRegisterInfo() + : HexagonGenRegisterInfo(Hexagon::R31) {} + + +bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const { + return R == Hexagon::R0 || R == Hexagon::R1 || R == Hexagon::R2 || + R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1; +} + +bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const { + return Hexagon::R16 <= Reg && Reg <= Hexagon::R27; +} + + +const MCPhysReg * +HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const { + static const MCPhysReg CallerSavedRegsV4[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, + Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, + Hexagon::R15, 0 + }; + + auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget()); + switch (HST.getHexagonArchVersion()) { + case HexagonSubtarget::V4: + case HexagonSubtarget::V5: + case HexagonSubtarget::V55: + case HexagonSubtarget::V60: + return CallerSavedRegsV4; + } + llvm_unreachable( + "Callee saved registers requested for unknown archtecture version"); +} + + +const MCPhysReg * +HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const MCPhysReg CalleeSavedRegsV3[] = { + Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + + switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) { + case HexagonSubtarget::V4: + case HexagonSubtarget::V5: + case HexagonSubtarget::V55: + case HexagonSubtarget::V60: + return CalleeSavedRegsV3; + } + llvm_unreachable("Callee saved registers requested for unknown architecture " + "version"); +} + +BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) + const { + BitVector Reserved(getNumRegs()); + Reserved.set(HEXAGON_RESERVED_REG_1); + Reserved.set(HEXAGON_RESERVED_REG_2); + Reserved.set(Hexagon::R29); + Reserved.set(Hexagon::R30); + Reserved.set(Hexagon::R31); + Reserved.set(Hexagon::PC); + Reserved.set(Hexagon::D15); + Reserved.set(Hexagon::LC0); + Reserved.set(Hexagon::LC1); + Reserved.set(Hexagon::SA0); + Reserved.set(Hexagon::SA1); + return Reserved; +} + + +void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOp, + RegScavenger *RS) const { + // + // Hexagon_TODO: Do we need to enforce this for Hexagon? + assert(SPAdj == 0 && "Unexpected"); + + MachineInstr &MI = *II; + MachineBasicBlock &MB = *MI.getParent(); + MachineFunction &MF = *MB.getParent(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); + auto &HFI = *HST.getFrameLowering(); + + unsigned BP = 0; + int FI = MI.getOperand(FIOp).getIndex(); + // Select the base pointer (BP) and calculate the actual offset from BP + // to the beginning of the object at index FI. + int Offset = HFI.getFrameIndexReference(MF, FI, BP); + // Add the offset from the instruction. + int RealOffset = Offset + MI.getOperand(FIOp+1).getImm(); + + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case Hexagon::TFR_FIA: + MI.setDesc(HII.get(Hexagon::A2_addi)); + MI.getOperand(FIOp).ChangeToImmediate(RealOffset); + MI.RemoveOperand(FIOp+1); + return; + case Hexagon::TFR_FI: + // Set up the instruction for updating below. + MI.setDesc(HII.get(Hexagon::A2_addi)); + break; + } + + if (HII.isValidOffset(Opc, RealOffset)) { + MI.getOperand(FIOp).ChangeToRegister(BP, false); + MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset); + return; + } + +#ifndef NDEBUG + const Function *F = MF.getFunction(); + dbgs() << "In function "; + if (F) dbgs() << F->getName(); + else dbgs() << "<?>"; + dbgs() << ", BB#" << MB.getNumber() << "\n" << MI; +#endif + llvm_unreachable("Unhandled instruction"); +} + + +unsigned HexagonRegisterInfo::getRARegister() const { + return Hexagon::R31; +} + + +unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction + &MF) const { + const HexagonFrameLowering *TFI = getFrameLowering(MF); + if (TFI->hasFP(MF)) + return getFrameRegister(); + return getStackRegister(); +} + + +unsigned HexagonRegisterInfo::getFrameRegister() const { + return Hexagon::R30; +} + + +unsigned HexagonRegisterInfo::getStackRegister() const { + return Hexagon::R29; +} + + +bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) + const { + return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF); +} + + +unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const { + return Hexagon::R6; +} + + +#define GET_REGINFO_TARGET_DESC +#include "HexagonGenRegisterInfo.inc" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h new file mode 100644 index 0000000..db7e0f2 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -0,0 +1,89 @@ +//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H + +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "HexagonGenRegisterInfo.inc" + +// +// We try not to hard code the reserved registers in our code, +// so the following two macros were defined. However, there +// are still a few places that R11 and R10 are hard wired. +// See below. If, in the future, we decided to change the reserved +// register. Don't forget changing the following places. +// +// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td +// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td +// 3. the definition of "IntRegs" in HexagonRegisterInfo.td +// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td +// +#define HEXAGON_RESERVED_REG_1 Hexagon::R10 +#define HEXAGON_RESERVED_REG_2 Hexagon::R11 + +namespace llvm { +class HexagonRegisterInfo : public HexagonGenRegisterInfo { +public: + HexagonRegisterInfo(); + + /// Code Generation virtual methods... + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) + const override; + + + BitVector getReservedRegs(const MachineFunction &MF) const override; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + + /// Returns true since we may need scavenging for a temporary register + /// when generating hardware loop instructions. + bool requiresRegisterScavenging(const MachineFunction &MF) const override { + return true; + } + + /// Returns true. Spill code for predicate registers might need an extra + /// register. + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } + + /// Returns true if the frame pointer is valid. + bool useFPForScavengingIndex(const MachineFunction &MF) const override; + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { + return true; + } + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(const MachineFunction &MF) const override; + unsigned getFrameRegister() const; + unsigned getStackRegister() const; + + const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF) const; + + unsigned getFirstCallerSavedNonParamReg() const; + + bool isEHReturnCalleeSaveReg(unsigned Reg) const; + bool isCalleeSaveReg(unsigned Reg) const; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td new file mode 100644 index 0000000..81629dc --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -0,0 +1,270 @@ +//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the Hexagon register file. +//===----------------------------------------------------------------------===// + +let Namespace = "Hexagon" in { + + class HexagonReg<bits<5> num, string n, list<string> alt = [], + list<Register> alias = []> : Register<n> { + field bits<5> Num; + let Aliases = alias; + let HWEncoding{4-0} = num; + } + + class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs, + list<string> alt = []> : + RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + + let AltNames = alt; + let HWEncoding{4-0} = num; + } + + // Registers are identified with 5-bit ID numbers. + // Ri - 32-bit integer registers. + class Ri<bits<5> num, string n, list<string> alt = []> : HexagonReg<num, n, alt> { + let Num = num; + } + + // Rf - 32-bit floating-point registers. + class Rf<bits<5> num, string n> : HexagonReg<num, n> { + let Num = num; + } + + + // Rd - 64-bit registers. + class Rd<bits<5> num, string n, list<Register> subregs> : + HexagonDoubleReg<num, n, subregs> { + let Num = num; + let SubRegs = subregs; + } + + // Rp - predicate registers + class Rp<bits<5> num, string n> : HexagonReg<num, n> { + let Num = num; + } + + + // Rq - vector predicate registers + class Rq<bits<3> num, string n> : Register<n, []> { + let HWEncoding{2-0} = num; + } + + // Rc - control registers + class Rc<bits<5> num, string n, + list<string> alt = [], list<Register> alias = []> : + HexagonReg<num, n, alt, alias> { + let Num = num; + } + + // Rcc - 64-bit control registers. + class Rcc<bits<5> num, string n, list<Register> subregs, + list<string> alt = []> : + HexagonDoubleReg<num, n, subregs, alt> { + let Num = num; + let SubRegs = subregs; + } + + // Mx - address modifier registers + class Mx<bits<1> num, string n> : HexagonReg<{0b0000, num}, n> { + let Num = !cast<bits<5>>(num); + } + + def subreg_loreg : SubRegIndex<32>; + def subreg_hireg : SubRegIndex<32, 32>; + def subreg_overflow : SubRegIndex<1, 0>; + + // Integer registers. + foreach i = 0-28 in { + def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>; + } + + def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>; + def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>; + def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>; + + // Aliases of the R* registers used to hold 64-bit int values (doubles). + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; + def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; + def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; + def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; + def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; + def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; + def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; + def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; + def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>; + def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>; + def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>; + def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>; + def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>; + def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>; + def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>; + def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>; + } + + // Predicate registers. + def P0 : Rp<0, "p0">, DwarfRegNum<[63]>; + def P1 : Rp<1, "p1">, DwarfRegNum<[64]>; + def P2 : Rp<2, "p2">, DwarfRegNum<[65]>; + def P3 : Rp<3, "p3">, DwarfRegNum<[66]>; + + // Modifier registers. + // C6 and C7 can also be M0 and M1, but register names must be unique, even + // if belonging to different register classes. + def M0 : Mx<0, "m0">, DwarfRegNum<[72]>; + def M1 : Mx<1, "m1">, DwarfRegNum<[73]>; + + // Fake register to represent USR.OVF bit. Artihmetic/saturating instruc- + // tions modify this bit, and multiple such instructions are allowed in the + // same packet. We need to ignore output dependencies on this bit, but not + // on the entire USR. + def USR_OVF : Rc<?, "usr.ovf">; + + // Control registers. + def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>; + def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>; + def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>; + def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>; + def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>, + DwarfRegNum<[71]>; + def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use + def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>; + def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>; + + def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> { + let SubRegIndices = [subreg_overflow]; + let SubRegs = [USR_OVF]; + } + def PC : Rc<9, "pc">, DwarfRegNum<[76]>; + def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; + def GP : Rc<11, "gp">, DwarfRegNum<[78]>; + def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; + def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>; + def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>; + def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>; +} + + // Control registers pairs. + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; + def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; + def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>; + def C9_8 : Rcc<8, "c9:8", [USR, PC]>, DwarfRegNum<[74]>; + def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; + def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>; + def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>; + } + + foreach i = 0-31 in { + def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>; + } + + // Aliases of the V* registers used to hold double vec values. + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>; + def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>; + def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>; + def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>; + def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>; + def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>; + def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>; + def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>; + def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>; + def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>; + def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>; + def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>; + def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>; + def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>; + def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>; + def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>; + } + + // Vector Predicate registers. + def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>; + def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>; + def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>; + def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>; + +// Register classes. +// +// FIXME: the register order should be defined in terms of the preferred +// allocation order... +// +def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, + (add (sequence "R%u", 0, 9), + (sequence "R%u", 12, 28), + R10, R11, R29, R30, R31)> { +} + +// Registers are listed in reverse order for allocation preference reasons. +def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32, + (add R7, R6, R5, R4, R3, R2, R1, R0)> ; + +def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, + (add (sequence "D%u", 0, 4), + (sequence "D%u", 6, 13), D5, D14, D15)>; + +def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512, + (add (sequence "V%u", 0, 31))>; + +def VecDblRegs : RegisterClass<"Hexagon", + [v128i8, v64i16, v32i32, v16i64], 1024, + (add (sequence "W%u", 0, 15))>; + +def VectorRegs128B : RegisterClass<"Hexagon", + [v128i8, v64i16, v32i32, v16i64], 1024, + (add (sequence "V%u", 0, 31))>; + +def VecDblRegs128B : RegisterClass<"Hexagon", + [v256i8,v128i16,v64i32,v32i64], 2048, + (add (sequence "W%u", 0, 15))>; + +def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512, + (add (sequence "Q%u", 0, 3))>; + +def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024, + (add (sequence "Q%u", 0, 3))>; + +def PredRegs : RegisterClass<"Hexagon", + [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, + (add (sequence "P%u", 0, 3))> +{ + let Size = 32; +} + +let Size = 32 in +def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>; + +let Size = 32, isAllocatable = 0 in +def CtrRegs : RegisterClass<"Hexagon", [i32], 32, + (add LC0, SA0, LC1, SA1, + P3_0, + M0, M1, C6, C7, CS0, CS1, UPCL, UPCH, + USR, USR_OVF, UGP, GP, PC)>; + +let Size = 64, isAllocatable = 0 in +def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64, + (add C1_0, C3_2, C7_6, C9_8, C11_10, CS, UPC)>; + +def VolatileV3 { + list<Register> Regs = [D0, D1, D2, D3, D4, D5, D6, D7, + R28, R31, + P0, P1, P2, P3, + M0, M1, + LC0, LC1, SA0, SA1, USR, USR_OVF]; +} + +def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a), +[{ + return isPositiveHalfWord(N); +}]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td new file mode 100644 index 0000000..6e4987b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -0,0 +1,24 @@ +//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V4 Machine Info + +//===----------------------------------------------------------------------===// + +include "HexagonScheduleV4.td" + +// V55 Machine Info + +include "HexagonScheduleV55.td" + +//===----------------------------------------------------------------------===// +// V60 Machine Info - +//===----------------------------------------------------------------------===// + +include "HexagonScheduleV60.td" + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td new file mode 100644 index 0000000..67af147 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td @@ -0,0 +1,206 @@ +//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. +// This file describes that machine information. + +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| + +// Functional Units. +def SLOT0 : FuncUnit; +def SLOT1 : FuncUnit; +def SLOT2 : FuncUnit; +def SLOT3 : FuncUnit; +// Endloop is a pseudo instruction that is encoded with 2 bits in a packet +// rather than taking an execution slot. This special unit is needed +// to schedule an ENDLOOP with 4 other instructions. +def SLOT_ENDLOOP: FuncUnit; + +// Itinerary classes. +def PSEUDO : InstrItinClass; +def PSEUDOM : InstrItinClass; +// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. +def DUPLEX : InstrItinClass; +def PREFIX : InstrItinClass; +def COMPOUND_CJ_ARCHDEPSLOT : InstrItinClass; +def COMPOUND : InstrItinClass; + +def ALU32_2op_tc_1_SLOT0123 : InstrItinClass; +def ALU32_2op_tc_2early_SLOT0123 : InstrItinClass; +def ALU32_3op_tc_2early_SLOT0123 : InstrItinClass; +def ALU32_3op_tc_1_SLOT0123 : InstrItinClass; +def ALU32_3op_tc_2_SLOT0123 : InstrItinClass; +def ALU32_ADDI_tc_1_SLOT0123 : InstrItinClass; +def ALU64_tc_1_SLOT23 : InstrItinClass; +def ALU64_tc_1or2_SLOT23 : InstrItinClass; +def ALU64_tc_2_SLOT23 : InstrItinClass; +def ALU64_tc_2early_SLOT23 : InstrItinClass; +def ALU64_tc_3x_SLOT23 : InstrItinClass; +def CR_tc_2_SLOT3 : InstrItinClass; +def CR_tc_2early_SLOT23 : InstrItinClass; +def CR_tc_2early_SLOT3 : InstrItinClass; +def CR_tc_3x_SLOT23 : InstrItinClass; +def CR_tc_3x_SLOT3 : InstrItinClass; +def J_tc_2early_SLOT23 : InstrItinClass; +def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass; +def J_tc_2early_SLOT2 : InstrItinClass; +def LD_tc_ld_SLOT01 : InstrItinClass; +def LD_tc_ld_SLOT0 : InstrItinClass; +def LD_tc_3or4stall_SLOT0 : InstrItinClass; +def M_tc_1_SLOT23 : InstrItinClass; +def M_tc_1or2_SLOT23 : InstrItinClass; +def M_tc_2_SLOT23 : InstrItinClass; +def M_tc_3_SLOT23 : InstrItinClass; +def M_tc_3x_SLOT23 : InstrItinClass; +def M_tc_3or4x_SLOT23 : InstrItinClass; +def ST_tc_st_SLOT01 : InstrItinClass; +def ST_tc_st_SLOT0 : InstrItinClass; +def ST_tc_ld_SLOT0 : InstrItinClass; +def ST_tc_3stall_SLOT0 : InstrItinClass; +def S_2op_tc_1_SLOT23 : InstrItinClass; +def S_2op_tc_2_SLOT23 : InstrItinClass; +def S_2op_tc_2early_SLOT23 : InstrItinClass; +def S_2op_tc_3or4x_SLOT23 : InstrItinClass; +def S_3op_tc_1_SLOT23 : InstrItinClass; +def S_3op_tc_1or2_SLOT23 : InstrItinClass; +def S_3op_tc_2_SLOT23 : InstrItinClass; +def S_3op_tc_2early_SLOT23 : InstrItinClass; +def S_3op_tc_3_SLOT23 : InstrItinClass; +def S_3op_tc_3x_SLOT23 : InstrItinClass; +def NCJ_tc_3or4stall_SLOT0 : InstrItinClass; +def V2LDST_tc_ld_SLOT01 : InstrItinClass; +def V2LDST_tc_st_SLOT0 : InstrItinClass; +def V2LDST_tc_st_SLOT01 : InstrItinClass; +def V4LDST_tc_ld_SLOT01 : InstrItinClass; +def V4LDST_tc_st_SLOT0 : InstrItinClass; +def V4LDST_tc_st_SLOT01 : InstrItinClass; +def J_tc_2early_SLOT0123 : InstrItinClass; +def EXTENDER_tc_1_SLOT0123 : InstrItinClass; +def S_3op_tc_3stall_SLOT23 : InstrItinClass; + + +def HexagonItinerariesV4 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ + // ALU32 + InstrItinData<ALU32_2op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_2op_tc_2early_SLOT0123, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2early_SLOT0123, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_ADDI_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // ALU64 + InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + + // CR -> System + InstrItinData<CR_tc_2_SLOT3 , [InstrStage<1, [SLOT3]>]>, + InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<1, [SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<1, [SLOT3]>]>, + + // Jump (conditional/unconditional/return etc) + // CR + InstrItinData<CR_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + // J + InstrItinData<J_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + // JR + InstrItinData<J_tc_2early_SLOT2 , [InstrStage<1, [SLOT2]>]>, + + //Load + InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<LD_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // M + InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + + // Store + // ST + InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + // ST0 + InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // S + InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + + // SYS + InstrItinData<ST_tc_3stall_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // New Value Compare Jump + InstrItinData<NCJ_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // Mem ops - MEM_V4 + InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>, + + // ENDLOOP + InstrItinData<J_tc_2early_SLOT0123 , [InstrStage<1, [SLOT_ENDLOOP]>]>, + + // Extender/PREFIX + InstrItinData<EXTENDER_tc_1_SLOT0123, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [SLOT2, SLOT3]>]> + ]>; + +def HexagonModelV4 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV4; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V4 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td new file mode 100644 index 0000000..d9ad25d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td @@ -0,0 +1,170 @@ +//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. +// This file describes that machine information. + +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| + +def CJ_tc_1_SLOT23 : InstrItinClass; +def CJ_tc_2early_SLOT23 : InstrItinClass; +def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass; +def COPROC_VX_vtc_long_SLOT23 : InstrItinClass; +def COPROC_VX_vtc_SLOT23 : InstrItinClass; +def J_tc_3stall_SLOT2 : InstrItinClass; +def MAPPING_tc_1_SLOT0123 : InstrItinClass; +def M_tc_3stall_SLOT23 : InstrItinClass; +def SUBINSN_tc_1_SLOT01 : InstrItinClass; +def SUBINSN_tc_2early_SLOT0 : InstrItinClass; +def SUBINSN_tc_2early_SLOT01 : InstrItinClass; +def SUBINSN_tc_3stall_SLOT0 : InstrItinClass; +def SUBINSN_tc_ld_SLOT0 : InstrItinClass; +def SUBINSN_tc_ld_SLOT01 : InstrItinClass; +def SUBINSN_tc_st_SLOT01 : InstrItinClass; + +def HexagonItinerariesV55 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ + // ALU32 + InstrItinData<ALU32_2op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_2op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2_SLOT0123 , + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_ADDI_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // ALU64 + InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // CR -> System + InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>, + + // Jump (conditional/unconditional/return etc) + InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + + // JR + InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>, + InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>, + + // Extender + InstrItinData<EXTENDER_tc_1_SLOT0123, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Load + InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + + // M + InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + + // Store + InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // Subinsn + InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_2early_SLOT01, + [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // S + InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // New Value Compare Jump + InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + + // Mem ops + InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // Endloop + InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>, + + // Vector + InstrItinData<COPROC_VMEM_vtc_long_SLOT01, + [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<COPROC_VX_vtc_long_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<COPROC_VX_vtc_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<MAPPING_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Misc + InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>, + InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [SLOT2, SLOT3]>]> + + ]>; + +def HexagonModelV55 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV55; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V4 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td new file mode 100644 index 0000000..2ccff82 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td @@ -0,0 +1,310 @@ +//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec". +def CVI_ST : FuncUnit; +def CVI_XLANE : FuncUnit; +def CVI_SHIFT : FuncUnit; +def CVI_MPY0 : FuncUnit; +def CVI_MPY1 : FuncUnit; +def CVI_LD : FuncUnit; + +// Combined functional units. +def CVI_XLSHF : FuncUnit; +def CVI_MPY01 : FuncUnit; +def CVI_ALL : FuncUnit; + +// Combined functional unit data. +def HexagonComboFuncsV60 : + ComboFuncUnits<[ + ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>, + ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>, + ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1, CVI_LD]> + ]>; + +// Note: When adding additional vector scheduling classes, add the +// corresponding methods to the class HexagonInstrInfo. +def CVI_VA : InstrItinClass; +def CVI_VA_DV : InstrItinClass; +def CVI_VX_LONG : InstrItinClass; +def CVI_VX_LATE : InstrItinClass; +def CVI_VX : InstrItinClass; +def CVI_VX_DV_LONG : InstrItinClass; +def CVI_VX_DV : InstrItinClass; +def CVI_VX_DV_SLOT2 : InstrItinClass; +def CVI_VP : InstrItinClass; +def CVI_VP_LONG : InstrItinClass; +def CVI_VP_VS_EARLY : InstrItinClass; +def CVI_VP_VS_LONG_EARLY : InstrItinClass; +def CVI_VP_VS_LONG : InstrItinClass; +def CVI_VP_VS : InstrItinClass; +def CVI_VP_DV : InstrItinClass; +def CVI_VS : InstrItinClass; +def CVI_VINLANESAT : InstrItinClass; +def CVI_VM_LD : InstrItinClass; +def CVI_VM_TMP_LD : InstrItinClass; +def CVI_VM_CUR_LD : InstrItinClass; +def CVI_VM_VP_LDU : InstrItinClass; +def CVI_VM_ST : InstrItinClass; +def CVI_VM_NEW_ST : InstrItinClass; +def CVI_VM_STU : InstrItinClass; +def CVI_HIST : InstrItinClass; +def CVI_VA_EXT : InstrItinClass; + +// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine. +// This file describes that machine information. +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| +// +// +// In addition to using the above SLOTS, there are also six vector pipelines +// in the CVI co-processor in the Hexagon V60 machine. +// +// |=========| |=========| |=========| |=========| |=========| |=========| +// SLOT | CVI_LD | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST | +// ==== |=========| |=========| |=========| |=========| |=========| |=========| +// S0-3 | | | CVI_VA | | CVI_VA | | CVI_VA | | CVI_VA | | | +// S2-3 | | | CVI_VX | | CVI_VX | | | | | | | +// S0-3 | | | | | | | | | CVI_VP | | | +// S0-3 | | | | | | | CVI_VS | | | | | +// S0-1 |(CVI_LD) | | CVI_LD | | CVI_LD | | CVI_LD | | CVI_LD | | | +// S0-1 |(C*TMP_LD) | | | | | | | | | | +// S01 |(C*_LDU) | | | | | | | | C*_LDU | | | +// S0 | | | CVI_ST | | CVI_ST | | CVI_ST | | CVI_ST | |(CVI_ST) | +// S0 | | | | | | | | | | |(C*TMP_ST) +// S01 | | | | | | | | | VSTU | |(C*_STU) | +// |=========| |=========| |=========| |=========| |=========| |=========| +// |=====================| |=====================| +// | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT| +// |=====================| |=====================| +// S0-3 | CVI_VA_DV | | CVI_VA_DV | +// S0-3 | | | CVI_VP_DV | +// S2-3 | CVI_VX_DV | | | +// |=====================| |=====================| +// |=====================================================================| +// S0-3 | CVI_HIST Histogram | +// S0123| CVI_VA_EXT Extract | +// |=====================================================================| + +def HexagonItinerariesV60 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [ + // ALU32 + InstrItinData<ALU32_2op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_2op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2_SLOT0123 , + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_3op_tc_2early_SLOT0123, + [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU32_ADDI_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // ALU64 + InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // CR -> System + InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>, + + // Jump (conditional/unconditional/return etc) + InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + + // JR + InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>, + InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>, + + // Extender + InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1, + [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Load + InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>, + InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + + // M + InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>, + InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + + // Store + InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + + // Subinsn + InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>, + InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_2early_SLOT01, + [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // S + InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60. + InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>, + + // New Value Compare Jump + InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>, + + // Mem ops + InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>, + InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>, + + // Endloop + InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>, + + // Vector + InstrItinData<COPROC_VMEM_vtc_long_SLOT01, + [InstrStage<3, [SLOT0, SLOT1]>]>, + InstrItinData<COPROC_VX_vtc_long_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<COPROC_VX_vtc_SLOT23 , + [InstrStage<3, [SLOT2, SLOT3]>]>, + InstrItinData<MAPPING_tc_1_SLOT0123 , + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + + // Duplex and Compound + InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>, + InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>, + // Misc + InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDOM , [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [SLOT2, SLOT3]>]>, + + // Latest CVI spec definitions. + InstrItinData<CVI_VA,[InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLANE,CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VA_DV, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>, + InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VX,[InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VX_DV_LONG, + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData<CVI_VX_DV, + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData<CVI_VX_DV_SLOT2, + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData<CVI_VP, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_VP_VS_EARLY, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_VS_LONG, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_VS, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_VS_LONG_EARLY, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VP_DV , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData<CVI_VS, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>]>, + InstrItinData<CVI_VINLANESAT, + [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>]>, + InstrItinData<CVI_VM_LD , [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VM_TMP_LD,[InstrStage<1,[SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>]>, + InstrItinData<CVI_VM_CUR_LD,[InstrStage<1,[SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VM_VP_LDU,[InstrStage<1,[SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_VM_ST , [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData<CVI_VM_NEW_ST,[InstrStage<1,[SLOT0], 0>, + InstrStage<1, [CVI_ST]>]>, + InstrItinData<CVI_VM_STU , [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData<CVI_HIST , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>, + InstrStage<1, [CVI_ALL]>]> + ]>; + +def HexagonModelV60 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV60; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V60 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td new file mode 100644 index 0000000..d8feb89 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td @@ -0,0 +1,121 @@ +//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +// +// selectcc mappings. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGT)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGT)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + + + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULT)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLT)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLE)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULE)), + (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for greater-equal-to Rs => greater-than Rs-1. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGE)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGE)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + + + +// +// selectcc mappings for predicate comparisons. +// +// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into: +// pt = not(p1 xor p2) +// Rd = mux(pt, true_val, false_val) +// and similarly for SETNE +// +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval, + IntRegs:$fval))>; + +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for 64-bit operands are messy. Hexagon does not have a +// MUX64 o, use this: +// selectcc(Rss, Rdd, tval, fval, cond) -> +// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi), +// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo)) + +// setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETGT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; + + +// setlt-64 -> setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETLT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp new file mode 100644 index 0000000..239dbda --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -0,0 +1,63 @@ +//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HexagonSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +#define DEBUG_TYPE "hexagon-selectiondag-info" + +SDValue +HexagonSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) + return SDValue(); + + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (SizeVal < 32 || (SizeVal % 8) != 0) + return SDValue(); + + // Special case aligned memcpys with size >= 32 bytes and a multiple of 8. + // + const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + Entry.Node = Src; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + + const char *SpecialMemcpyName = + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*DAG.getContext()), + DAG.getTargetExternalSymbol( + SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h new file mode 100644 index 0000000..80ac5d7 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -0,0 +1,35 @@ +//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Hexagon subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp new file mode 100644 index 0000000..10fe606 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -0,0 +1,168 @@ +//=== HexagonSplitConst32AndConst64.cpp - split CONST32/Const64 into HI/LO ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// When the compiler is invoked with no small data, for instance, with the -G0 +// command line option, then all CONST32_* opcodes should be broken down into +// appropriate LO and HI instructions. This splitting is done by this pass. +// The only reason this is not done in the DAG lowering itself is that there +// is no simple way of getting the register allocator to allot the same hard +// register to the result of LO and HI instructions. This pass is always +// scheduled after register allocation. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMachineFunctionInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonTargetObjectFile.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <map> + +using namespace llvm; + +#define DEBUG_TYPE "xfer" + +namespace llvm { + FunctionPass *createHexagonSplitConst32AndConst64(); + void initializeHexagonSplitConst32AndConst64Pass(PassRegistry&); +} + +namespace { + +class HexagonSplitConst32AndConst64 : public MachineFunctionPass { + public: + static char ID; + HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "Hexagon Split Const32s and Const64s"; + } + bool runOnMachineFunction(MachineFunction &Fn) override; +}; + + +char HexagonSplitConst32AndConst64::ID = 0; + + +bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { + + const HexagonTargetObjectFile &TLOF = + *static_cast<const HexagonTargetObjectFile *>( + Fn.getTarget().getObjFileLowering()); + if (TLOF.IsSmallDataEnabled()) + return true; + + const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock *MBB = &*MBBb; + // Traverse the basic block + MachineBasicBlock::iterator MII = MBB->begin(); + MachineBasicBlock::iterator MIE = MBB->end (); + while (MII != MIE) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::CONST32_Int_Real && + MI->getOperand(1).isBlockAddress()) { + int DestReg = MI->getOperand(0).getReg(); + MachineOperand &Symbol = MI->getOperand (1); + + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::LO), DestReg).addOperand(Symbol); + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::HI), DestReg).addOperand(Symbol); + // MBB->erase returns the iterator to the next instruction, which is the + // one we want to process next + MII = MBB->erase (MI); + continue; + } + + else if (Opc == Hexagon::CONST32_Int_Real || + Opc == Hexagon::CONST32_Float_Real) { + int DestReg = MI->getOperand(0).getReg(); + + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST32_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue); + MII = MBB->erase (MI); + continue; + } + else if (Opc == Hexagon::CONST64_Int_Real || + Opc == Hexagon::CONST64_Float_Real) { + int DestReg = MI->getOperand(0).getReg(); + + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST64_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg); + unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg); + + int32_t LowWord = (ImmValue & 0xFFFFFFFF); + int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; + + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord); + BuildMI (*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord); + MII = MBB->erase (MI); + continue; + } + ++MII; + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass * +llvm::createHexagonSplitConst32AndConst64() { + return new HexagonSplitConst32AndConst64(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp new file mode 100644 index 0000000..d4e95b0d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -0,0 +1,1209 @@ +//===--- HexagonSplitDouble.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hsdr" + +#include "HexagonRegisterInfo.h" +#include "HexagonTargetMachine.h" + +#include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <map> +#include <set> +#include <vector> + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonSplitDoubleRegs(); + void initializeHexagonSplitDoubleRegsPass(PassRegistry&); +} + +namespace { + static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1), + cl::desc("Maximum number of split partitions")); + static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true), + cl::desc("Do not split loads or stores")); + + class HexagonSplitDoubleRegs : public MachineFunctionPass { + public: + static char ID; + HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr), + TII(nullptr) { + initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon Split Double Registers"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + static const TargetRegisterClass *const DoubleRC; + + const HexagonRegisterInfo *TRI; + const HexagonInstrInfo *TII; + const MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + + typedef std::set<unsigned> USet; + typedef std::map<unsigned,USet> UUSetMap; + typedef std::pair<unsigned,unsigned> UUPair; + typedef std::map<unsigned,UUPair> UUPairMap; + typedef std::map<const MachineLoop*,USet> LoopRegMap; + + bool isInduction(unsigned Reg, LoopRegMap &IRM) const; + bool isVolatileInstr(const MachineInstr *MI) const; + bool isFixedInstr(const MachineInstr *MI) const; + void partitionRegisters(UUSetMap &P2Rs); + int32_t profit(const MachineInstr *MI) const; + bool isProfitable(const USet &Part, LoopRegMap &IRM) const; + + void collectIndRegsForLoop(const MachineLoop *L, USet &Rs); + void collectIndRegs(LoopRegMap &IRM); + + void createHalfInstr(unsigned Opc, MachineInstr *MI, + const UUPairMap &PairMap, unsigned SubR); + void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap); + void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap); + void splitCombine(MachineInstr *MI, const UUPairMap &PairMap); + void splitExt(MachineInstr *MI, const UUPairMap &PairMap); + void splitShift(MachineInstr *MI, const UUPairMap &PairMap); + void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap); + bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap); + void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap); + void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap); + bool splitPartition(const USet &Part); + + static int Counter; + static void dump_partition(raw_ostream&, const USet&, + const TargetRegisterInfo&); + }; + char HexagonSplitDoubleRegs::ID; + int HexagonSplitDoubleRegs::Counter = 0; + const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC + = &Hexagon::DoubleRegsRegClass; +} + +INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double", + "Hexagon Split Double Registers", false, false) + + +static inline uint32_t getRegState(const MachineOperand &R) { + assert(R.isReg()); + return getDefRegState(R.isDef()) | + getImplRegState(R.isImplicit()) | + getKillRegState(R.isKill()) | + getDeadRegState(R.isDead()) | + getUndefRegState(R.isUndef()) | + getInternalReadRegState(R.isInternalRead()) | + (R.isDebug() ? RegState::Debug : 0); +} + + +void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os, + const USet &Part, const TargetRegisterInfo &TRI) { + dbgs() << '{'; + for (auto I : Part) + dbgs() << ' ' << PrintReg(I, &TRI); + dbgs() << " }"; +} + + +bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const { + for (auto I : IRM) { + const USet &Rs = I.second; + if (Rs.find(Reg) != Rs.end()) + return true; + } + return false; +} + + +bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const { + for (auto &I : MI->memoperands()) + if (I->isVolatile()) + return true; + return false; +} + + +bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { + if (MI->mayLoad() || MI->mayStore()) + if (MemRefsFixed || isVolatileInstr(MI)) + return true; + if (MI->isDebugValue()) + return false; + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + return true; + + case TargetOpcode::PHI: + case TargetOpcode::COPY: + break; + + case Hexagon::L2_loadrd_io: + // Not handling stack stores (only reg-based addresses). + if (MI->getOperand(1).isReg()) + break; + return true; + case Hexagon::S2_storerd_io: + // Not handling stack stores (only reg-based addresses). + if (MI->getOperand(0).isReg()) + break; + return true; + case Hexagon::L2_loadrd_pi: + case Hexagon::S2_storerd_pi: + + case Hexagon::A2_tfrpi: + case Hexagon::A2_combineii: + case Hexagon::A4_combineir: + case Hexagon::A4_combineii: + case Hexagon::A4_combineri: + case Hexagon::A2_combinew: + case Hexagon::CONST64_Int_Real: + + case Hexagon::A2_sxtw: + + case Hexagon::A2_andp: + case Hexagon::A2_orp: + case Hexagon::A2_xorp: + case Hexagon::S2_asl_i_p_or: + case Hexagon::S2_asl_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_lsr_i_p: + break; + } + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + return true; + } + return false; +} + + +void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { + typedef std::map<unsigned,unsigned> UUMap; + typedef std::vector<unsigned> UVect; + + unsigned NumRegs = MRI->getNumVirtRegs(); + BitVector DoubleRegs(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + unsigned R = TargetRegisterInfo::index2VirtReg(i); + if (MRI->getRegClass(R) == DoubleRC) + DoubleRegs.set(i); + } + + BitVector FixedRegs(NumRegs); + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + unsigned R = TargetRegisterInfo::index2VirtReg(x); + MachineInstr *DefI = MRI->getVRegDef(R); + // In some cases a register may exist, but never be defined or used. + // It should never appear anywhere, but mark it as "fixed", just to be + // safe. + if (!DefI || isFixedInstr(DefI)) + FixedRegs.set(x); + } + + UUSetMap AssocMap; + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + if (FixedRegs[x]) + continue; + unsigned R = TargetRegisterInfo::index2VirtReg(x); + DEBUG(dbgs() << PrintReg(R, TRI) << " ~~"); + USet &Asc = AssocMap[R]; + for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end(); + U != Z; ++U) { + MachineOperand &Op = *U; + MachineInstr *UseI = Op.getParent(); + if (isFixedInstr(UseI)) + continue; + for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) { + MachineOperand &MO = UseI->getOperand(i); + // Skip non-registers or registers with subregisters. + if (&MO == &Op || !MO.isReg() || MO.getSubReg()) + continue; + unsigned T = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(T)) { + FixedRegs.set(x); + continue; + } + if (MRI->getRegClass(T) != DoubleRC) + continue; + unsigned u = TargetRegisterInfo::virtReg2Index(T); + if (FixedRegs[u]) + continue; + DEBUG(dbgs() << ' ' << PrintReg(T, TRI)); + Asc.insert(T); + // Make it symmetric. + AssocMap[T].insert(R); + } + } + DEBUG(dbgs() << '\n'); + } + + UUMap R2P; + unsigned NextP = 1; + USet Visited; + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + unsigned R = TargetRegisterInfo::index2VirtReg(x); + if (Visited.count(R)) + continue; + // Create a new partition for R. + unsigned ThisP = FixedRegs[x] ? 0 : NextP++; + UVect WorkQ; + WorkQ.push_back(R); + for (unsigned i = 0; i < WorkQ.size(); ++i) { + unsigned T = WorkQ[i]; + if (Visited.count(T)) + continue; + R2P[T] = ThisP; + Visited.insert(T); + // Add all registers associated with T. + USet &Asc = AssocMap[T]; + for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J) + WorkQ.push_back(*J); + } + } + + for (auto I : R2P) + P2Rs[I.second].insert(I.first); +} + + +static inline int32_t profitImm(unsigned Lo, unsigned Hi) { + int32_t P = 0; + bool LoZ1 = false, HiZ1 = false; + if (Lo == 0 || Lo == 0xFFFFFFFF) + P += 10, LoZ1 = true; + if (Hi == 0 || Hi == 0xFFFFFFFF) + P += 10, HiZ1 = true; + if (!LoZ1 && !HiZ1 && Lo == Hi) + P += 3; + return P; +} + + +int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { + unsigned ImmX = 0; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::PHI: + for (const auto &Op : MI->operands()) + if (!Op.getSubReg()) + return 0; + return 10; + case TargetOpcode::COPY: + if (MI->getOperand(1).getSubReg() != 0) + return 10; + return 0; + + case Hexagon::L2_loadrd_io: + case Hexagon::S2_storerd_io: + return -1; + case Hexagon::L2_loadrd_pi: + case Hexagon::S2_storerd_pi: + return 2; + + case Hexagon::A2_tfrpi: + case Hexagon::CONST64_Int_Real: { + uint64_t D = MI->getOperand(1).getImm(); + unsigned Lo = D & 0xFFFFFFFFULL; + unsigned Hi = D >> 32; + return profitImm(Lo, Hi); + } + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + return profitImm(MI->getOperand(1).getImm(), + MI->getOperand(2).getImm()); + case Hexagon::A4_combineri: + ImmX++; + case Hexagon::A4_combineir: { + ImmX++; + int64_t V = MI->getOperand(ImmX).getImm(); + if (V == 0 || V == -1) + return 10; + // Fall through into A2_combinew. + } + case Hexagon::A2_combinew: + return 2; + + case Hexagon::A2_sxtw: + return 3; + + case Hexagon::A2_andp: + case Hexagon::A2_orp: + case Hexagon::A2_xorp: + return 1; + + case Hexagon::S2_asl_i_p_or: { + unsigned S = MI->getOperand(3).getImm(); + if (S == 0 || S == 32) + return 10; + return -1; + } + case Hexagon::S2_asl_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_lsr_i_p: + unsigned S = MI->getOperand(2).getImm(); + if (S == 0 || S == 32) + return 10; + if (S == 16) + return 5; + if (S == 48) + return 7; + return -10; + } + + return 0; +} + + +bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) + const { + unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0; + int32_t TotalP = 0; + + for (unsigned DR : Part) { + MachineInstr *DefI = MRI->getVRegDef(DR); + int32_t P = profit(DefI); + if (P == INT_MIN) + return false; + TotalP += P; + // Reduce the profitability of splitting induction registers. + if (isInduction(DR, IRM)) + TotalP -= 30; + + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) { + MachineInstr *UseI = U->getParent(); + if (isFixedInstr(UseI)) { + FixedNum++; + // Calculate the cost of generating REG_SEQUENCE instructions. + for (auto &Op : UseI->operands()) { + if (Op.isReg() && Part.count(Op.getReg())) + if (Op.getSubReg()) + TotalP -= 2; + } + continue; + } + // If a register from this partition is used in a fixed instruction, + // and there is also a register in this partition that is used in + // a loop phi node, then decrease the splitting profit as this can + // confuse the modulo scheduler. + if (UseI->isPHI()) { + const MachineBasicBlock *PB = UseI->getParent(); + const MachineLoop *L = MLI->getLoopFor(PB); + if (L && L->getHeader() == PB) + LoopPhiNum++; + } + // Splittable instruction. + SplitNum++; + int32_t P = profit(UseI); + if (P == INT_MIN) + return false; + TotalP += P; + } + } + + if (FixedNum > 0 && LoopPhiNum > 0) + TotalP -= 20*LoopPhiNum; + + DEBUG(dbgs() << "Partition profit: " << TotalP << '\n'); + return TotalP > 0; +} + + +void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, + USet &Rs) { + const MachineBasicBlock *HB = L->getHeader(); + const MachineBasicBlock *LB = L->getLoopLatch(); + if (!HB || !LB) + return; + + // Examine the latch branch. Expect it to be a conditional branch to + // the header (either "br-cond header" or "br-cond exit; br header"). + MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB); + SmallVector<MachineOperand,2> Cond; + bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false); + // Only analyzable conditional branches. HII::AnalyzeBranch will put + // the branch opcode as the first element of Cond, and the predicate + // operand as the second. + if (BadLB || Cond.size() != 2) + return; + // Only simple jump-conditional (with or without negation). + if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm())) + return; + // Must go to the header. + if (TB != HB && FB != HB) + return; + assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch"); + // Expect a predicate register. + unsigned PR = Cond[1].getReg(); + assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass); + + // Get the registers on which the loop controlling compare instruction + // depends. + unsigned CmpR1 = 0, CmpR2 = 0; + const MachineInstr *CmpI = MRI->getVRegDef(PR); + while (CmpI->getOpcode() == Hexagon::C2_not) + CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg()); + + int Mask = 0, Val = 0; + bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val); + if (!OkCI) + return; + // Eliminate non-double input registers. + if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC) + CmpR1 = 0; + if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC) + CmpR2 = 0; + if (!CmpR1 && !CmpR2) + return; + + // Now examine the top of the loop: the phi nodes that could poten- + // tially define loop induction registers. The registers defined by + // such a phi node would be used in a 64-bit add, which then would + // be used in the loop compare instruction. + + // Get the set of all double registers defined by phi nodes in the + // loop header. + typedef std::vector<unsigned> UVect; + UVect DP; + for (auto &MI : *HB) { + if (!MI.isPHI()) + break; + const MachineOperand &MD = MI.getOperand(0); + unsigned R = MD.getReg(); + if (MRI->getRegClass(R) == DoubleRC) + DP.push_back(R); + } + if (DP.empty()) + return; + + auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool { + for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end(); + I != E; ++I) { + const MachineInstr *UseI = I->getParent(); + if (UseI->getOpcode() != Hexagon::A2_addp) + continue; + // Get the output from the add. If it is one of the inputs to the + // loop-controlling compare instruction, then R is likely an induc- + // tion register. + unsigned T = UseI->getOperand(0).getReg(); + if (T == CmpR1 || T == CmpR2) + return false; + } + return true; + }; + UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp); + Rs.insert(DP.begin(), End); + Rs.insert(CmpR1); + Rs.insert(CmpR2); + + DEBUG({ + dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: "; + dump_partition(dbgs(), Rs, *TRI); + dbgs() << '\n'; + }); +} + + +void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) { + typedef std::vector<MachineLoop*> LoopVector; + LoopVector WorkQ; + + for (auto I : *MLI) + WorkQ.push_back(I); + for (unsigned i = 0; i < WorkQ.size(); ++i) { + for (auto I : *WorkQ[i]) + WorkQ.push_back(I); + } + + USet Rs; + for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) { + MachineLoop *L = WorkQ[i]; + Rs.clear(); + collectIndRegsForLoop(L, Rs); + if (!Rs.empty()) + IRM.insert(std::make_pair(L, Rs)); + } +} + + +void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, + const UUPairMap &PairMap, unsigned SubR) { + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc)); + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) { + NewI->addOperand(Op); + continue; + } + // For register operands, set the subregister. + unsigned R = Op.getReg(); + unsigned SR = Op.getSubReg(); + bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R); + bool isKill = Op.isKill(); + if (isVirtReg && MRI->getRegClass(R) == DoubleRC) { + isKill = false; + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) { + SR = SubR; + } else { + const UUPair &P = F->second; + R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second; + SR = 0; + } + } + auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill, + Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(), + Op.isInternalRead()); + NewI->addOperand(CO); + } +} + + +void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, + const UUPairMap &PairMap) { + bool Load = MI->mayLoad(); + unsigned OrigOpc = MI->getOpcode(); + bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi || + OrigOpc == Hexagon::S2_storerd_pi); + MachineInstr *LowI, *HighI; + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + // Index of the base-address-register operand. + unsigned AdrX = PostInc ? (Load ? 2 : 1) + : (Load ? 1 : 0); + MachineOperand &AdrOp = MI->getOperand(AdrX); + unsigned RSA = getRegState(AdrOp); + MachineOperand &ValOp = Load ? MI->getOperand(0) + : (PostInc ? MI->getOperand(3) + : MI->getOperand(2)); + UUPairMap::const_iterator F = PairMap.find(ValOp.getReg()); + assert(F != PairMap.end()); + + if (Load) { + const UUPair &P = F->second; + int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm(); + LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off); + HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off+4); + } else { + const UUPair &P = F->second; + int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm(); + LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off) + .addReg(P.first); + HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off+4) + .addReg(P.second); + } + + if (PostInc) { + // Create the increment of the address register. + int64_t Inc = Load ? MI->getOperand(3).getImm() + : MI->getOperand(2).getImm(); + MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0); + const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg()); + unsigned NewR = MRI->createVirtualRegister(RC); + assert(!UpdOp.getSubReg() && "Def operand with subreg"); + BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR) + .addReg(AdrOp.getReg(), RSA) + .addImm(Inc); + MRI->replaceRegWith(UpdOp.getReg(), NewR); + // The original instruction will be deleted later. + } + + // Generate a new pair of memory-operands. + MachineFunction &MF = *B.getParent(); + for (auto &MO : MI->memoperands()) { + const MachinePointerInfo &Ptr = MO->getPointerInfo(); + unsigned F = MO->getFlags(); + int A = MO->getAlignment(); + + auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A); + LowI->addMemOperand(MF, Tmp1); + auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4)); + HighI->addMemOperand(MF, Tmp2); + } +} + + +void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isImm()); + uint64_t V = Op1.getImm(); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + + // The operand to A2_tfrsi can only have 32 significant bits. Immediate + // values in MachineOperand are stored as 64-bit integers, and so the + // value -1 may be represented either as 64-bit -1, or 4294967295. Both + // will have the 32 higher bits truncated in the end, but -1 will remain + // as -1, while the latter may appear to be a large unsigned value + // requiring a constant extender. The casting to int32_t will select the + // former representation. (The same reasoning applies to all 32-bit + // values.) + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) + .addImm(int32_t(V & 0xFFFFFFFFULL)); + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) + .addImm(int32_t(V >> 32)); +} + + +void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + assert(Op0.isReg()); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + + if (Op1.isImm()) { + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) + .addImm(Op1.getImm()); + } else if (Op1.isReg()) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second) + .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg()); + } else + llvm_unreachable("Unexpected operand"); + + if (Op2.isImm()) { + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) + .addImm(Op2.getImm()); + } else if (Op2.isReg()) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) + .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg()); + } else + llvm_unreachable("Unexpected operand"); +} + + +void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isReg()); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned RS = getRegState(Op1); + + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) + .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg()); + BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second) + .addReg(Op1.getReg(), RS, Op1.getSubReg()) + .addImm(31); +} + + +void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + assert(Op0.isReg() && Op1.isReg() && Op2.isImm()); + int64_t Sh64 = Op2.getImm(); + assert(Sh64 >= 0 && Sh64 < 64); + unsigned S = Sh64; + + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned LoR = P.first; + unsigned HiR = P.second; + using namespace Hexagon; + + unsigned Opc = MI->getOpcode(); + bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p); + bool Left = !Right; + bool Signed = (Opc == S2_asr_i_p); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned RS = getRegState(Op1); + unsigned ShiftOpc = Left ? S2_asl_i_r + : (Signed ? S2_asr_i_r : S2_lsr_i_r); + unsigned LoSR = subreg_loreg; + unsigned HiSR = subreg_hireg; + + if (S == 0) { + // No shift, subregister copy. + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR) + .addReg(Op1.getReg(), RS, HiSR); + } else if (S < 32) { + const TargetRegisterClass *IntRC = &IntRegsRegClass; + unsigned TmpR = MRI->createVirtualRegister(IntRC); + // Expansion: + // Shift left: DR = shl R, #s + // LoR = shl R.lo, #s + // TmpR = extractu R.lo, #s, #32-s + // HiR = or (TmpR, asl(R.hi, #s)) + // Shift right: DR = shr R, #s + // HiR = shr R.hi, #s + // TmpR = shr R.lo, #s + // LoR = insert TmpR, R.hi, #s, #32-s + + // Shift left: + // LoR = shl R.lo, #s + // Shift right: + // TmpR = shr R.lo, #s + + // Make a special case for A2_aslh and A2_asrh (they are predicable as + // opposed to S2_asl_i_r/S2_asr_i_r). + if (S == 16 && Left) + BuildMI(B, MI, DL, TII->get(A2_aslh), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else if (S == 16 && Signed) + BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else + BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) + .addImm(S); + + if (Left) { + // TmpR = extractu R.lo, #s, #32-s + BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) + .addImm(S) + .addImm(32-S); + // HiR = or (TmpR, asl(R.hi, #s)) + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(TmpR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(S); + } else { + // HiR = shr R.hi, #s + BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR) + .addImm(S); + // LoR = insert TmpR, R.hi, #s, #32-s + BuildMI(B, MI, DL, TII->get(S2_insert), LoR) + .addReg(TmpR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(S) + .addImm(32-S); + } + } else if (S == 32) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)); + if (!Signed) + BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) + .addImm(0); + else // Must be right shift. + BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(31); + } else if (S < 64) { + S -= 32; + if (S == 16 && Left) + BuildMI(B, MI, DL, TII->get(A2_aslh), HiR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else if (S == 16 && Signed) + BuildMI(B, MI, DL, TII->get(A2_asrh), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR); + else + BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)) + .addImm(S); + + if (Signed) + BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(31); + else + BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) + .addImm(0); + } +} + + +void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); + assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm()); + int64_t Sh64 = Op3.getImm(); + assert(Sh64 >= 0 && Sh64 < 64); + unsigned S = Sh64; + + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned LoR = P.first; + unsigned HiR = P.second; + using namespace Hexagon; + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned RS1 = getRegState(Op1); + unsigned RS2 = getRegState(Op2); + const TargetRegisterClass *IntRC = &IntRegsRegClass; + + unsigned LoSR = subreg_loreg; + unsigned HiSR = subreg_hireg; + + // Op0 = S2_asl_i_p_or Op1, Op2, Op3 + // means: Op0 = or (Op1, asl(Op2, Op3)) + + // Expansion of + // DR = or (R1, asl(R2, #s)) + // + // LoR = or (R1.lo, asl(R2.lo, #s)) + // Tmp1 = extractu R2.lo, #s, #32-s + // Tmp2 = or R1.hi, Tmp1 + // HiR = or (Tmp2, asl(R2.hi, #s)) + + if (S == 0) { + // DR = or (R1, asl(R2, #0)) + // -> or (R1, R2) + // i.e. LoR = or R1.lo, R2.lo + // HiR = or R1.hi, R2.hi + BuildMI(B, MI, DL, TII->get(A2_or), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(A2_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, HiSR); + } else if (S < 32) { + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) + .addImm(S); + unsigned TmpR1 = MRI->createVirtualRegister(IntRC); + BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) + .addImm(S) + .addImm(32-S); + unsigned TmpR2 = MRI->createVirtualRegister(IntRC); + BuildMI(B, MI, DL, TII->get(A2_or), TmpR2) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(TmpR1); + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(TmpR2) + .addReg(Op2.getReg(), RS2, HiSR) + .addImm(S); + } else if (S == 32) { + // DR = or (R1, asl(R2, #32)) + // -> or R1, R2.lo + // LoR = R1.lo + // HiR = or R1.hi, R2.lo + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(A2_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, LoSR); + } else if (S < 64) { + // DR = or (R1, asl(R2, #s)) + // + // LoR = R1:lo + // HiR = or (R1:hi, asl(R2:lo, #s-32)) + S -= 32; + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, LoSR) + .addImm(S); + } +} + + +bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, + const UUPairMap &PairMap) { + DEBUG(dbgs() << "Splitting: " << *MI); + bool Split = false; + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + + switch (Opc) { + case TargetOpcode::PHI: + case TargetOpcode::COPY: { + unsigned DstR = MI->getOperand(0).getReg(); + if (MRI->getRegClass(DstR) == DoubleRC) { + createHalfInstr(Opc, MI, PairMap, subreg_loreg); + createHalfInstr(Opc, MI, PairMap, subreg_hireg); + Split = true; + } + break; + } + case A2_andp: + createHalfInstr(A2_and, MI, PairMap, subreg_loreg); + createHalfInstr(A2_and, MI, PairMap, subreg_hireg); + Split = true; + break; + case A2_orp: + createHalfInstr(A2_or, MI, PairMap, subreg_loreg); + createHalfInstr(A2_or, MI, PairMap, subreg_hireg); + Split = true; + break; + case A2_xorp: + createHalfInstr(A2_xor, MI, PairMap, subreg_loreg); + createHalfInstr(A2_xor, MI, PairMap, subreg_hireg); + Split = true; + break; + + case L2_loadrd_io: + case L2_loadrd_pi: + case S2_storerd_io: + case S2_storerd_pi: + splitMemRef(MI, PairMap); + Split = true; + break; + + case A2_tfrpi: + case CONST64_Int_Real: + splitImmediate(MI, PairMap); + Split = true; + break; + + case A2_combineii: + case A4_combineir: + case A4_combineii: + case A4_combineri: + case A2_combinew: + splitCombine(MI, PairMap); + Split = true; + break; + + case A2_sxtw: + splitExt(MI, PairMap); + Split = true; + break; + + case S2_asl_i_p: + case S2_asr_i_p: + case S2_lsr_i_p: + splitShift(MI, PairMap); + Split = true; + break; + + case S2_asl_i_p_or: + splitAslOr(MI, PairMap); + Split = true; + break; + + default: + llvm_unreachable("Instruction not splitable"); + return false; + } + + return Split; +} + + +void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, + const UUPairMap &PairMap) { + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || !Op.getSubReg()) + continue; + unsigned R = Op.getReg(); + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) + continue; + const UUPair &P = F->second; + switch (Op.getSubReg()) { + case Hexagon::subreg_loreg: + Op.setReg(P.first); + break; + case Hexagon::subreg_hireg: + Op.setReg(P.second); + break; + } + Op.setSubReg(0); + } +} + + +void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg()) + continue; + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) + continue; + const UUPair &Pr = F->second; + unsigned NewDR = MRI->createVirtualRegister(DoubleRC); + BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR) + .addReg(Pr.first) + .addImm(Hexagon::subreg_loreg) + .addReg(Pr.second) + .addImm(Hexagon::subreg_hireg); + Op.setReg(NewDR); + } +} + + +bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { + const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; + typedef std::set<MachineInstr*> MISet; + bool Changed = false; + + DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI); + dbgs() << '\n'); + + UUPairMap PairMap; + + MISet SplitIns; + for (unsigned DR : Part) { + MachineInstr *DefI = MRI->getVRegDef(DR); + SplitIns.insert(DefI); + + // Collect all instructions, including fixed ones. We won't split them, + // but we need to visit them again to insert the REG_SEQUENCE instructions. + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) + SplitIns.insert(U->getParent()); + + unsigned LoR = MRI->createVirtualRegister(IntRC); + unsigned HiR = MRI->createVirtualRegister(IntRC); + DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> " + << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n'); + PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR))); + } + + MISet Erase; + for (auto MI : SplitIns) { + if (isFixedInstr(MI)) { + collapseRegPairs(MI, PairMap); + } else { + bool Done = splitInstr(MI, PairMap); + if (Done) + Erase.insert(MI); + Changed |= Done; + } + } + + for (unsigned DR : Part) { + // Before erasing "double" instructions, revisit all uses of the double + // registers in this partition, and replace all uses of them with subre- + // gisters, with the corresponding single registers. + MISet Uses; + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) + Uses.insert(U->getParent()); + for (auto M : Uses) + replaceSubregUses(M, PairMap); + } + + for (auto MI : Erase) { + MachineBasicBlock *B = MI->getParent(); + B->erase(MI); + } + + return Changed; +} + + +bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Splitting double registers in function: " + << MF.getName() << '\n'); + + auto &ST = MF.getSubtarget<HexagonSubtarget>(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + MLI = &getAnalysis<MachineLoopInfo>(); + + UUSetMap P2Rs; + LoopRegMap IRM; + + collectIndRegs(IRM); + partitionRegisters(P2Rs); + + DEBUG({ + dbgs() << "Register partitioning: (partition #0 is fixed)\n"; + for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { + dbgs() << '#' << I->first << " -> "; + dump_partition(dbgs(), I->second, *TRI); + dbgs() << '\n'; + } + }); + + bool Changed = false; + int Limit = MaxHSDR; + + for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { + if (I->first == 0) + continue; + if (Limit >= 0 && Counter >= Limit) + break; + USet &Part = I->second; + DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n'); + if (!isProfitable(Part, IRM)) + continue; + Counter++; + Changed |= splitPartition(Part); + } + + return Changed; +} + +FunctionPass *llvm::createHexagonSplitDoubleRegs() { + return new HexagonSplitDoubleRegs(); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp new file mode 100644 index 0000000..b5339ff --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -0,0 +1,616 @@ +//===--- HexagonStoreWidening.cpp------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Replace sequences of "narrow" stores to adjacent memory locations with +// a fewer "wide" stores that have the same effect. +// For example, replace: +// S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte +// S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte +// with +// S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword +// The above is the general idea. The actual cases handled by the code +// may be a bit more complex. +// The purpose of this pass is to reduce the number of outstanding stores, +// or as one could say, "reduce store queue pressure". Also, wide stores +// mean fewer stores, and since there are only two memory instructions allowed +// per packet, it also means fewer packets, and ultimately fewer cycles. +//===---------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-widen-stores" + +#include "HexagonTargetMachine.h" + +#include "llvm/PassSupport.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#include <algorithm> + + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonStoreWidening(); + void initializeHexagonStoreWideningPass(PassRegistry&); +} + +namespace { + struct HexagonStoreWidening : public MachineFunctionPass { + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + AliasAnalysis *AA; + MachineFunction *MF; + + public: + static char ID; + HexagonStoreWidening() : MachineFunctionPass(ID) { + initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Hexagon Store Widening"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static bool handledStoreType(const MachineInstr *MI); + + private: + static const int MaxWideSize = 4; + + typedef std::vector<MachineInstr*> InstrGroup; + typedef std::vector<InstrGroup> InstrGroupList; + + bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); + bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); + void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, + InstrGroup::iterator End, InstrGroup &Group); + void createStoreGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups); + bool processBasicBlock(MachineBasicBlock &MBB); + bool processStoreGroup(InstrGroup &Group); + bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, + InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); + bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool replaceStores(InstrGroup &OG, InstrGroup &NG); + bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); + }; + +} // namespace + + +namespace { + +// Some local helper functions... +unsigned getBaseAddressRegister(const MachineInstr *MI) { + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isReg() && "Expecting register operand"); + return MO.getReg(); +} + +int64_t getStoreOffset(const MachineInstr *MI) { + unsigned OpC = MI->getOpcode(); + assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); + + switch (OpC) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: { + const MachineOperand &MO = MI->getOperand(1); + assert(MO.isImm() && "Expecting immediate offset"); + return MO.getImm(); + } + } + dbgs() << *MI; + llvm_unreachable("Store offset calculation missing for a handled opcode"); + return 0; +} + +const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { + assert(!MI->memoperands_empty() && "Expecting memory operands"); + return **MI->memoperands_begin(); +} + +} // namespace + + +char HexagonStoreWidening::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", + "Hexason Store Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) + + +// Filtering function: any stores whose opcodes are not "approved" of by +// this function will not be subjected to widening. +inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { + // For now, only handle stores of immediate values. + // Also, reject stores to stack slots. + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + // Base address must be a register. (Implement FI later.) + return MI->getOperand(0).isReg(); + default: + return false; + } +} + + +// Check if the machine memory operand MMO is aliased with any of the +// stores in the store group Stores. +bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, + const MachineMemOperand &MMO) { + if (!MMO.getValue()) + return true; + + MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); + + for (auto SI : Stores) { + const MachineMemOperand &SMO = getStoreTarget(SI); + if (!SMO.getValue()) + return true; + + MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); + if (AA->alias(L, SL)) + return true; + } + + return false; +} + + +// Check if the machine instruction MI accesses any storage aliased with +// any store in the group Stores. +bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, + const MachineInstr *MI) { + for (auto &I : MI->memoperands()) + if (instrAliased(Stores, *I)) + return true; + return false; +} + + +// Inspect a machine basic block, and generate store groups out of stores +// encountered in the block. +// +// A store group is a group of stores that use the same base register, +// and which can be reordered within that group without altering the +// semantics of the program. A single store group could be widened as +// a whole, if there existed a single store instruction with the same +// semantics as the entire group. In many cases, a single store group +// may need more than one wide store. +void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups) { + InstrGroup AllInsns; + + // Copy all instruction pointers from the basic block to a temporary + // list. This will allow operating on the list, and modifying its + // elements without affecting the basic block. + for (auto &I : MBB) + AllInsns.push_back(&I); + + // Traverse all instructions in the AllInsns list, and if we encounter + // a store, then try to create a store group starting at that instruction + // i.e. a sequence of independent stores that can be widened. + for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { + MachineInstr *MI = *I; + // Skip null pointers (processed instructions). + if (!MI || !handledStoreType(MI)) + continue; + + // Found a store. Try to create a store group. + InstrGroup G; + createStoreGroup(MI, I+1, E, G); + if (G.size() > 1) + StoreGroups.push_back(G); + } +} + + +// Create a single store group. The stores need to be independent between +// themselves, and also there cannot be other instructions between them +// that could read or modify storage being stored into. +void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, + InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { + assert(handledStoreType(BaseStore) && "Unexpected instruction"); + unsigned BaseReg = getBaseAddressRegister(BaseStore); + InstrGroup Other; + + Group.push_back(BaseStore); + + for (auto I = Begin; I != End; ++I) { + MachineInstr *MI = *I; + if (!MI) + continue; + + if (handledStoreType(MI)) { + // If this store instruction is aliased with anything already in the + // group, terminate the group now. + if (instrAliased(Group, getStoreTarget(MI))) + return; + // If this store is aliased to any of the memory instructions we have + // seen so far (that are not a part of this group), terminate the group. + if (instrAliased(Other, getStoreTarget(MI))) + return; + + unsigned BR = getBaseAddressRegister(MI); + if (BR == BaseReg) { + Group.push_back(MI); + *I = 0; + continue; + } + } + + // Assume calls are aliased to everything. + if (MI->isCall() || MI->hasUnmodeledSideEffects()) + return; + + if (MI->mayLoad() || MI->mayStore()) { + if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) + return; + Other.push_back(MI); + } + } // for +} + + +// Check if store instructions S1 and S2 are adjacent. More precisely, +// S2 has to access memory immediately following that accessed by S1. +bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, + const MachineInstr *S2) { + if (!handledStoreType(S1) || !handledStoreType(S2)) + return false; + + const MachineMemOperand &S1MO = getStoreTarget(S1); + + // Currently only handling immediate stores. + int Off1 = S1->getOperand(1).getImm(); + int Off2 = S2->getOperand(1).getImm(); + + return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2) + : int(Off1+S1MO.getSize()) == Off2; +} + + +/// Given a sequence of adjacent stores, and a maximum size of a single wide +/// store, pick a group of stores that can be replaced by a single store +/// of size not exceeding MaxSize. The selected sequence will be recorded +/// in OG ("old group" of instructions). +/// OG should be empty on entry, and should be left empty if the function +/// fails. +bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, + InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, + unsigned MaxSize) { + assert(Begin != End && "No instructions to analyze"); + assert(OG.empty() && "Old group not empty on entry"); + + if (std::distance(Begin, End) <= 1) + return false; + + MachineInstr *FirstMI = *Begin; + assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); + const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); + unsigned Alignment = FirstMMO.getAlignment(); + unsigned SizeAccum = FirstMMO.getSize(); + unsigned FirstOffset = getStoreOffset(FirstMI); + + // The initial value of SizeAccum should always be a power of 2. + assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); + + // If the size of the first store equals to or exceeds the limit, do nothing. + if (SizeAccum >= MaxSize) + return false; + + // If the size of the first store is greater than or equal to the address + // stored to, then the store cannot be made any wider. + if (SizeAccum >= Alignment) + return false; + + // The offset of a store will put restrictions on how wide the store can be. + // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. + // If the first store already exhausts the offset limits, quit. Test this + // by checking if the next wider size would exceed the limit. + if ((2*SizeAccum-1) & FirstOffset) + return false; + + OG.push_back(FirstMI); + MachineInstr *S1 = FirstMI, *S2 = *(Begin+1); + InstrGroup::iterator I = Begin+1; + + // Pow2Num will be the largest number of elements in OG such that the sum + // of sizes of stores 0...Pow2Num-1 will be a power of 2. + unsigned Pow2Num = 1; + unsigned Pow2Size = SizeAccum; + + // Be greedy: keep accumulating stores as long as they are to adjacent + // memory locations, and as long as the total number of bytes stored + // does not exceed the limit (MaxSize). + // Keep track of when the total size covered is a power of 2, since + // this is a size a single store can cover. + while (I != End) { + S2 = *I; + // Stores are sorted, so if S1 and S2 are not adjacent, there won't be + // any other store to fill the "hole". + if (!storesAreAdjacent(S1, S2)) + break; + + unsigned S2Size = getStoreTarget(S2).getSize(); + if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) + break; + + OG.push_back(S2); + SizeAccum += S2Size; + if (isPowerOf2_32(SizeAccum)) { + Pow2Num = OG.size(); + Pow2Size = SizeAccum; + } + if ((2*Pow2Size-1) & FirstOffset) + break; + + S1 = S2; + ++I; + } + + // The stores don't add up to anything that can be widened. Clean up. + if (Pow2Num <= 1) { + OG.clear(); + return false; + } + + // Only leave the stored being widened. + OG.resize(Pow2Num); + TotalSize = Pow2Size; + return true; +} + + +/// Given an "old group" OG of stores, create a "new group" NG of instructions +/// to replace them. Ideally, NG would only have a single instruction in it, +/// but that may only be possible for store-immediate. +bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + // XXX Current limitations: + // - only expect stores of immediate values in OG, + // - only handle a TotalSize of up to 4. + + if (TotalSize > 4) + return false; + + unsigned Acc = 0; // Value accumulator. + unsigned Shift = 0; + + for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { + MachineInstr *MI = *I; + const MachineMemOperand &MMO = getStoreTarget(MI); + MachineOperand &SO = MI->getOperand(2); // Source. + assert(SO.isImm() && "Expecting an immediate operand"); + + unsigned NBits = MMO.getSize()*8; + unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); + unsigned Val = (SO.getImm() & Mask) << Shift; + Acc |= Val; + Shift += NBits; + } + + + MachineInstr *FirstSt = OG.front(); + DebugLoc DL = OG.back()->getDebugLoc(); + const MachineMemOperand &OldM = getStoreTarget(FirstSt); + MachineMemOperand *NewM = + MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), + TotalSize, OldM.getAlignment(), + OldM.getAAInfo()); + + if (Acc < 0x10000) { + // Create mem[hw] = #Acc + unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : + (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; + assert(WOpc && "Unexpected size"); + + int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); + const MCInstrDesc &StD = TII->get(WOpc); + MachineOperand &MR = FirstSt->getOperand(0); + int64_t Off = FirstSt->getOperand(1).getImm(); + MachineInstr *StI = BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill())) + .addImm(Off) + .addImm(Val); + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + } else { + // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg + const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) + .addImm(int(Acc)); + NG.push_back(TfrI); + + unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : + (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; + assert(WOpc && "Unexpected size"); + + const MCInstrDesc &StD = TII->get(WOpc); + MachineOperand &MR = FirstSt->getOperand(0); + int64_t Off = FirstSt->getOperand(1).getImm(); + MachineInstr *StI = BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill())) + .addImm(Off) + .addReg(VReg, RegState::Kill); + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + } + + return true; +} + + +// Replace instructions from the old group OG with instructions from the +// new group NG. Conceptually, remove all instructions in OG, and then +// insert all instructions in NG, starting at where the first instruction +// from OG was (in the order in which they appeared in the basic block). +// (The ordering in OG does not have to match the order in the basic block.) +bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { + DEBUG({ + dbgs() << "Replacing:\n"; + for (auto I : OG) + dbgs() << " " << *I; + dbgs() << "with\n"; + for (auto I : NG) + dbgs() << " " << *I; + }); + + MachineBasicBlock *MBB = OG.back()->getParent(); + MachineBasicBlock::iterator InsertAt = MBB->end(); + + // Need to establish the insertion point. The best one is right before + // the first store in the OG, but in the order in which the stores occur + // in the program list. Since the ordering in OG does not correspond + // to the order in the program list, we need to do some work to find + // the insertion point. + + // Create a set of all instructions in OG (for quick lookup). + SmallPtrSet<MachineInstr*, 4> InstrSet; + for (auto I : OG) + InstrSet.insert(I); + + // Traverse the block, until we hit an instruction from OG. + for (auto &I : *MBB) { + if (InstrSet.count(&I)) { + InsertAt = I; + break; + } + } + + assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); + + bool AtBBStart = false; + + // InsertAt points at the first instruction that will be removed. We need + // to move it out of the way, so it remains valid after removing all the + // old stores, and so we are able to recover it back to the proper insertion + // position. + if (InsertAt != MBB->begin()) + --InsertAt; + else + AtBBStart = true; + + for (auto I : OG) + I->eraseFromParent(); + + if (!AtBBStart) + ++InsertAt; + else + InsertAt = MBB->begin(); + + for (auto I : NG) + MBB->insert(InsertAt, I); + + return true; +} + + +// Break up the group into smaller groups, each of which can be replaced by +// a single wide store. Widen each such smaller group and replace the old +// instructions with the widened ones. +bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { + bool Changed = false; + InstrGroup::iterator I = Group.begin(), E = Group.end(); + InstrGroup OG, NG; // Old and new groups. + unsigned CollectedSize; + + while (I != E) { + OG.clear(); + NG.clear(); + + bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && + createWideStores(OG, NG, CollectedSize) && + replaceStores(OG, NG); + if (!Succ) + continue; + + assert(OG.size() > 1 && "Created invalid group"); + assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); + I += OG.size()-1; + + Changed = true; + } + + return Changed; +} + + +// Process a single basic block: create the store groups, and replace them +// with the widened stores, if possible. Processing of each basic block +// is independent from processing of any other basic block. This transfor- +// mation could be stopped after having processed any basic block without +// any ill effects (other than not having performed widening in the unpro- +// cessed blocks). Also, the basic blocks can be processed in any order. +bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { + InstrGroupList SGs; + bool Changed = false; + + createStoreGroups(MBB, SGs); + + auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { + return getStoreOffset(A) < getStoreOffset(B); + }; + for (auto &G : SGs) { + assert(G.size() > 1 && "Store group with fewer than 2 elements"); + std::sort(G.begin(), G.end(), Less); + + Changed |= processStoreGroup(G); + } + + return Changed; +} + + +bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { + MF = &MFn; + auto &ST = MFn.getSubtarget<HexagonSubtarget>(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &MFn.getRegInfo(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + + bool Changed = false; + + for (auto &B : MFn) + Changed |= processBasicBlock(B); + + return Changed; +} + + +FunctionPass *llvm::createHexagonStoreWidening() { + return new HexagonStoreWidening(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp new file mode 100644 index 0000000..aa0efd4 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -0,0 +1,125 @@ +//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "HexagonSubtarget.h" +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include <map> + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-subtarget" + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "HexagonGenSubtargetInfo.inc" + +static cl::opt<bool> EnableMemOps("enable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), + cl::desc("Generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> DisableMemOps("disable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), + cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Generate non-chopped conversion from fp to int.")); + +static cl::opt<bool> EnableBSBSched("enable-bsb-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon Double Vector eXtensions")); + +static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon Vector eXtensions")); + +static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + +void HexagonSubtarget::initializeEnvironment() { + UseMemOps = false; + ModeIEEERndNear = false; + UseBSBScheduling = false; +} + +HexagonSubtarget & +HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { + CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU); + + static std::map<StringRef, HexagonArchEnum> CpuTable { + { "hexagonv4", V4 }, + { "hexagonv5", V5 }, + { "hexagonv55", V55 }, + { "hexagonv60", V60 }, + }; + + auto foundIt = CpuTable.find(CPUString); + if (foundIt != CpuTable.end()) + HexagonArchVersion = foundIt->second; + else + llvm_unreachable("Unrecognized Hexagon processor version"); + + UseHVXOps = false; + UseHVXDblOps = false; + ParseSubtargetFeatures(CPUString, FS); + + if (EnableHexagonHVX.getPosition()) + UseHVXOps = EnableHexagonHVX; + if (EnableHexagonHVXDouble.getPosition()) + UseHVXDblOps = EnableHexagonHVXDouble; + + return *this; +} + +HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, + StringRef FS, const TargetMachine &TM) + : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + FrameLowering() { + + initializeEnvironment(); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUString); + + // UseMemOps on by default unless disabled explicitly + if (DisableMemOps) + UseMemOps = false; + else if (EnableMemOps) + UseMemOps = true; + else + UseMemOps = false; + + if (EnableIEEERndNear) + ModeIEEERndNear = true; + else + ModeIEEERndNear = false; + + UseBSBScheduling = hasV60TOps() && EnableBSBSched; +} + +// Pin the vtable to this file. +void HexagonSubtarget::anchor() {} + +bool HexagonSubtarget::enableMachineScheduler() const { + if (DisableHexagonMISched.getNumOccurrences()) + return !DisableHexagonMISched; + return true; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h new file mode 100644 index 0000000..c7ae139 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -0,0 +1,121 @@ +//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H + +#include "HexagonFrameLowering.h" +#include "HexagonISelLowering.h" +#include "HexagonInstrInfo.h" +#include "HexagonSelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "HexagonGenSubtargetInfo.inc" + +#define Hexagon_SMALL_DATA_THRESHOLD 8 +#define Hexagon_SLOTS 4 + +namespace llvm { + +class HexagonSubtarget : public HexagonGenSubtargetInfo { + virtual void anchor(); + + bool UseMemOps, UseHVXOps, UseHVXDblOps; + bool ModeIEEERndNear; + +public: + enum HexagonArchEnum { + V4, V5, V55, V60 + }; + + HexagonArchEnum HexagonArchVersion; + /// True if the target should use Back-Skip-Back scheduling. This is the + /// default for V60. + bool UseBSBScheduling; + +private: + std::string CPUString; + HexagonInstrInfo InstrInfo; + HexagonTargetLowering TLInfo; + HexagonSelectionDAGInfo TSInfo; + HexagonFrameLowering FrameLowering; + InstrItineraryData InstrItins; + void initializeEnvironment(); + +public: + HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + const TargetMachine &TM); + + /// getInstrItins - Return the instruction itineraries based on subtarget + /// selection. + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const HexagonRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + const HexagonTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const HexagonFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU, + StringRef FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool useMemOps() const { return UseMemOps; } + bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } + bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } + bool hasV55TOps() const { return getHexagonArchVersion() >= V55; } + bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; } + bool hasV60TOps() const { return getHexagonArchVersion() >= V60; } + bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; } + bool modeIEEERndNear() const { return ModeIEEERndNear; } + bool useHVXOps() const { return UseHVXOps; } + bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; } + bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; } + + bool useBSBScheduling() const { return UseBSBScheduling; } + bool enableMachineScheduler() const override; + // Always use the TargetLowering default scheduler. + // FIXME: This will use the vliw scheduler which is probably just hurting + // compiler time and will be removed eventually anyway. + bool enableMachineSchedDefaultSched() const override { return false; } + + const std::string &getCPUString () const { return CPUString; } + + // Threshold for small data section + unsigned getSmallDataThreshold() const { + return Hexagon_SMALL_DATA_THRESHOLD; + } + const HexagonArchEnum &getHexagonArchVersion() const { + return HexagonArchVersion; + } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp new file mode 100644 index 0000000..9dccd69 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -0,0 +1,299 @@ +//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Hexagon target spec. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "Hexagon.h" +#include "HexagonISelLowering.h" +#include "HexagonMachineScheduler.h" +#include "HexagonTargetObjectFile.h" +#include "HexagonTargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", + cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); + +static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + +static cl::opt<bool> DisableStoreWidening("disable-store-widen", + cl::Hidden, cl::init(false), cl::desc("Disable store widening")); + +static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets", + cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Early expansion of MUX")); + +static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden, + cl::ZeroOrMore, cl::desc("Enable early if-conversion")); + +static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true), + cl::Hidden, cl::desc("Generate \"insert\" instructions")); + +static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true), + cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions")); + +static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true), + cl::Hidden, cl::desc("Generate \"extract\" instructions")); + +static cl::opt<bool> EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden, + cl::desc("Enable converting conditional transfers into MUX instructions")); + +static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true), + cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " + "predicate instructions")); + +static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden, + cl::desc("Disable splitting double registers")); + +static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true), + cl::Hidden, cl::desc("Bit simplification")); + +static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true), + cl::Hidden, cl::desc("Loop rescheduling")); + +/// HexagonTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int HexagonTargetMachineModule; +int HexagonTargetMachineModule = 0; + +extern "C" void LLVMInitializeHexagonTarget() { + // Register the target. + RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); +} + +static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { + return new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>()); +} + +static MachineSchedRegistry +SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", + createVLIWMachineSched); + +namespace llvm { + FunctionPass *createHexagonBitSimplify(); + FunctionPass *createHexagonCallFrameInformation(); + FunctionPass *createHexagonCFGOptimizer(); + FunctionPass *createHexagonCommonGEP(); + FunctionPass *createHexagonCopyToCombine(); + FunctionPass *createHexagonEarlyIfConversion(); + FunctionPass *createHexagonExpandCondsets(); + FunctionPass *createHexagonExpandPredSpillCode(); + FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonGenExtract(); + FunctionPass *createHexagonGenInsert(); + FunctionPass *createHexagonGenMux(); + FunctionPass *createHexagonGenPredicate(); + FunctionPass *createHexagonHardwareLoops(); + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel); + FunctionPass *createHexagonLoopRescheduling(); + FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonOptimizeSZextends(); + FunctionPass *createHexagonPacketizer(); + FunctionPass *createHexagonPeephole(); + FunctionPass *createHexagonSplitConst32AndConst64(); + FunctionPass *createHexagonSplitDoubleRegs(); + FunctionPass *createHexagonStoreWidening(); +} // end namespace llvm; + +/// HexagonTargetMachine ctor - Create an ILP32 architecture model. +/// + +/// Hexagon_TODO: Do I need an aggregate alignment? +/// +HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-" + "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-" + "n16:32", TT, CPU, FS, Options, RM, CM, OL), + TLOF(make_unique<HexagonTargetObjectFile>()) { + initAsmInfo(); +} + +const HexagonSubtarget * +HexagonTargetMachine::getSubtargetImpl(const Function &F) const { + AttributeSet FnAttrs = F.getAttributes(); + Attribute CPUAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); + Attribute FSAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this); + } + return I.get(); +} + +TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(HexagonTTIImpl(this, F)); + }); +} + + +HexagonTargetMachine::~HexagonTargetMachine() {} + +namespace { +/// Hexagon Code Generator Pass Configuration Options. +class HexagonPassConfig : public TargetPassConfig { +public: + HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) { + bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None); + if (!NoOpt) { + if (EnableExpandCondsets) { + Pass *Exp = createHexagonExpandCondsets(); + insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp)); + } + } + } + + HexagonTargetMachine &getHexagonTargetMachine() const { + return getTM<HexagonTargetMachine>(); + } + + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + return createVLIWMachineSched(C); + } + + void addIRPasses() override; + bool addInstSelector() override; + void addPreRegAlloc() override; + void addPostRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; +}; +} // namespace + +TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { + return new HexagonPassConfig(this, PM); +} + +void HexagonPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + bool NoOpt = (getOptLevel() == CodeGenOpt::None); + + addPass(createAtomicExpandPass(TM)); + if (!NoOpt) { + if (EnableCommGEP) + addPass(createHexagonCommonGEP()); + // Replace certain combinations of shifts and ands with extracts. + if (EnableGenExtract) + addPass(createHexagonGenExtract()); + } +} + +bool HexagonPassConfig::addInstSelector() { + HexagonTargetMachine &TM = getHexagonTargetMachine(); + bool NoOpt = (getOptLevel() == CodeGenOpt::None); + + if (!NoOpt) + addPass(createHexagonOptimizeSZextends()); + + addPass(createHexagonISelDag(TM, getOptLevel())); + + if (!NoOpt) { + // Create logical operations on predicate registers. + if (EnableGenPred) + addPass(createHexagonGenPredicate(), false); + // Rotate loops to expose bit-simplification opportunities. + if (EnableLoopResched) + addPass(createHexagonLoopRescheduling(), false); + // Split double registers. + if (!DisableHSDR) + addPass(createHexagonSplitDoubleRegs()); + // Bit simplification. + if (EnableBitSimplify) + addPass(createHexagonBitSimplify(), false); + addPass(createHexagonPeephole()); + printAndVerify("After hexagon peephole pass"); + if (EnableGenInsert) + addPass(createHexagonGenInsert(), false); + if (EnableEarlyIf) + addPass(createHexagonEarlyIfConversion(), false); + } + + return false; +} + +void HexagonPassConfig::addPreRegAlloc() { + if (getOptLevel() != CodeGenOpt::None) { + if (!DisableStoreWidening) + addPass(createHexagonStoreWidening(), false); + if (!DisableHardwareLoops) + addPass(createHexagonHardwareLoops(), false); + } +} + +void HexagonPassConfig::addPostRegAlloc() { + if (getOptLevel() != CodeGenOpt::None) + if (!DisableHexagonCFGOpt) + addPass(createHexagonCFGOptimizer(), false); +} + +void HexagonPassConfig::addPreSched2() { + addPass(createHexagonCopyToCombine(), false); + if (getOptLevel() != CodeGenOpt::None) + addPass(&IfConverterID, false); + addPass(createHexagonSplitConst32AndConst64()); +} + +void HexagonPassConfig::addPreEmitPass() { + bool NoOpt = (getOptLevel() == CodeGenOpt::None); + + if (!NoOpt) + addPass(createHexagonNewValueJump(), false); + + // Expand Spill code for predicate registers. + addPass(createHexagonExpandPredSpillCode(), false); + + // Create Packets. + if (!NoOpt) { + if (!DisableHardwareLoops) + addPass(createHexagonFixupHwLoops(), false); + // Generate MUX from pairs of conditional transfers. + if (EnableGenMux) + addPass(createHexagonGenMux(), false); + + addPass(createHexagonPacketizer(), false); + } + + // Add CFI instructions if necessary. + addPass(createHexagonCallFrameInformation(), false); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h new file mode 100644 index 0000000..968814b --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -0,0 +1,50 @@ +//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H + +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class Module; + +class HexagonTargetMachine : public LLVMTargetMachine { + std::unique_ptr<TargetLoweringObjectFile> TLOF; + mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap; + +public: + HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~HexagonTargetMachine() override; + const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; + + static unsigned getModuleMatchQuality(const Module &M); + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; + + HexagonTargetObjectFile *getObjFileLowering() const override { + return static_cast<HexagonTargetObjectFile*>(TLOF.get()); + } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp new file mode 100644 index 0000000..ccca620 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -0,0 +1,98 @@ +//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetObjectFile.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; + +static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold", + cl::init(8), cl::Hidden, + cl::desc("The maximum size of an object in the sdata section")); + +void HexagonTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); + + SmallDataSection = getContext().getELFSection( + ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC); +} + +// sdata/sbss support taken largely from the MIPS Backend. +static bool IsInSmallSection(uint64_t Size) { + return Size > 0 && Size <= (uint64_t)SmallDataThreshold; +} + +bool HexagonTargetObjectFile::IsSmallDataEnabled () const { + return SmallDataThreshold > 0; +} + +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const { + // If the primary definition of this global value is outside the current + // translation unit or the global value is available for inspection but not + // emission, then do nothing. + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return false; + + // Otherwise, Check if GV should be in sdata/sbss, when normally it would end + // up in getKindForGlobal(GV, TM). + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +} + +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, + SectionKind Kind) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); + if (!GVA) + return false; + + if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) { + Type *Ty = GV->getType()->getElementType(); + return IsInSmallSection( + GV->getParent()->getDataLayout().getTypeAllocSize(Ty)); + } + + return false; +} + +MCSection * +HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + + // Handle Small Section classification here. + if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallBSSSection; + if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h new file mode 100644 index 0000000..da0eeeb --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -0,0 +1,41 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + + class HexagonTargetObjectFile : public TargetLoweringObjectFileELF { + MCSectionELF *SmallDataSection; + MCSectionELF *SmallBSSSection; + + public: + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + + /// IsGlobalInSmallSection - Return true if this global address should be + /// placed into small data/bss section. + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM, + SectionKind Kind) const; + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const; + + bool IsSmallDataEnabled () const; + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; + }; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h new file mode 100644 index 0000000..e19c404 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h @@ -0,0 +1,31 @@ +//===-- HexagonTargetStreamer.h - Hexagon Target Streamer ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONTARGETSTREAMER_H +#define HEXAGONTARGETSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +class HexagonTargetStreamer : public MCTargetStreamer { +public: + HexagonTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0){}; + virtual void emitFAlign(unsigned Size, unsigned MaxBytesToEmit){}; + virtual void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, + unsigned AccessGranularity){}; + virtual void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign, + unsigned AccessGranularity){}; +}; +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp new file mode 100644 index 0000000..a05443e --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -0,0 +1,38 @@ +//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// Hexagon target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetTransformInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagontti" + +TargetTransformInfo::PopcntSupportKind +HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { + // Return Fast Hardware support as every input < 64 bits will be promoted + // to 64 bits. + return TargetTransformInfo::PSK_FastHardware; +} + +// The Hexagon target can unroll loops with run-time trip counts. +void HexagonTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + UP.Runtime = UP.Partial = true; +} + +unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { + return vector ? 0 : 32; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h new file mode 100644 index 0000000..71ae17a --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -0,0 +1,70 @@ +//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// Hexagon target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> { + typedef BasicTTIImplBase<HexagonTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const HexagonSubtarget *ST; + const HexagonTargetLowering *TLI; + + const HexagonSubtarget *getST() const { return ST; } + const HexagonTargetLowering *getTLI() const { return TLI; } + +public: + explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + HexagonTTIImpl(const HexagonTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + HexagonTTIImpl(HexagonTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + + /// \name Scalar TTI Implementations + /// @{ + + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; + + // The Hexagon target can unroll loops with run-time trip counts. + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool vector) const; + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp new file mode 100644 index 0000000..8185054 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -0,0 +1,1601 @@ +//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a simple VLIW packetizer using DFA. The packetizer works on +// machine basic blocks. For each instruction I in BB, the packetizer consults +// the DFA to see if machine resources are available to execute I. If so, the +// packetizer checks if I depends on any instruction J in the current packet. +// If no dependency is found, I is added to current packet and machine resource +// is marked as taken. If any dependency is found, a target API call is made to +// prune the dependence. +// +//===----------------------------------------------------------------------===// +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonVLIWPacketizer.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include <map> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "packets" + +static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon packetizer pass")); + +static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", + cl::ZeroOrMore, cl::Hidden, cl::init(true), + cl::desc("Allow non-solo packetization of volatile memory references")); + +static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); + +static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores", + cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Disable vector double new-value-stores")); + +extern cl::opt<bool> ScheduleInlineAsm; + +namespace llvm { + FunctionPass *createHexagonPacketizer(); + void initializeHexagonPacketizerPass(PassRegistry&); +} + + +namespace { + class HexagonPacketizer : public MachineFunctionPass { + public: + static char ID; + HexagonPacketizer() : MachineFunctionPass(ID) { + initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const override { + return "Hexagon Packetizer"; + } + bool runOnMachineFunction(MachineFunction &Fn) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + }; + + char HexagonPacketizer::ID = 0; +} + +INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", + false, false) + + +HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, + MachineLoopInfo &MLI, AliasAnalysis *AA, + const MachineBranchProbabilityInfo *MBPI) + : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) { + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); +} + +// Check if FirstI modifies a register that SecondI reads. +static bool hasWriteToReadDep(const MachineInstr *FirstI, + const MachineInstr *SecondI, const TargetRegisterInfo *TRI) { + for (auto &MO : FirstI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (SecondI->readsRegister(R, TRI)) + return true; + } + return false; +} + + +static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, + MachineBasicBlock::iterator BundleIt, bool Before) { + MachineBasicBlock::instr_iterator InsertPt; + if (Before) + InsertPt = BundleIt.getInstrIterator(); + else + InsertPt = std::next(BundleIt).getInstrIterator(); + + MachineBasicBlock &B = *MI->getParent(); + // The instruction should at least be bundled with the preceding instruction + // (there will always be one, i.e. BUNDLE, if nothing else). + assert(MI->isBundledWithPred()); + if (MI->isBundledWithSucc()) { + MI->clearFlag(MachineInstr::BundledSucc); + MI->clearFlag(MachineInstr::BundledPred); + } else { + // If it's not bundled with the successor (i.e. it is the last one + // in the bundle), then we can simply unbundle it from the predecessor, + // which will take care of updating the predecessor's flag. + MI->unbundleFromPred(); + } + B.splice(InsertPt, &B, MI); + + // Get the size of the bundle without asserting. + MachineBasicBlock::const_instr_iterator I(BundleIt); + MachineBasicBlock::const_instr_iterator E = B.instr_end(); + unsigned Size = 0; + for (++I; I != E && I->isBundledWithPred(); ++I) + ++Size; + + // If there are still two or more instructions, then there is nothing + // else to be done. + if (Size > 1) + return BundleIt; + + // Otherwise, extract the single instruction out and delete the bundle. + MachineBasicBlock::iterator NextIt = std::next(BundleIt); + MachineInstr *SingleI = BundleIt->getNextNode(); + SingleI->unbundleFromPred(); + assert(!SingleI->isBundledWithSucc()); + BundleIt->eraseFromParent(); + return NextIt; +} + + +bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { + if (DisablePacketizer) + return false; + + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + auto &MLI = getAnalysis<MachineLoopInfo>(); + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + + if (EnableGenAllInsnClass) + HII->genAllInsnTimingClasses(MF); + + // Instantiate the packetizer. + HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI); + + // DFA state table should not be empty. + assert(Packetizer.getResourceTracker() && "Empty DFA table!"); + + // + // Loop over all basic blocks and remove KILL pseudo-instructions + // These instructions confuse the dependence analysis. Consider: + // D0 = ... (Insn 0) + // R0 = KILL R0, D0 (Insn 1) + // R0 = ... (Insn 2) + // Here, Insn 1 will result in the dependence graph not emitting an output + // dependence between Insn 0 and Insn 2. This can lead to incorrect + // packetization + // + for (auto &MB : MF) { + auto End = MB.end(); + auto MI = MB.begin(); + while (MI != End) { + auto NextI = std::next(MI); + if (MI->isKill()) { + MB.erase(MI); + End = MB.end(); + } + MI = NextI; + } + } + + // Loop over all of the basic blocks. + for (auto &MB : MF) { + auto Begin = MB.begin(), End = MB.end(); + while (Begin != End) { + // First the first non-boundary starting from the end of the last + // scheduling region. + MachineBasicBlock::iterator RB = Begin; + while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF)) + ++RB; + // First the first boundary starting from the beginning of the new + // region. + MachineBasicBlock::iterator RE = RB; + while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF)) + ++RE; + // Add the scheduling boundary if it's not block end. + if (RE != End) + ++RE; + // If RB == End, then RE == End. + if (RB != End) + Packetizer.PacketizeMIs(&MB, RB, RE); + + Begin = RE; + } + } + + Packetizer.unpacketizeSoloInstrs(MF); + return true; +} + + +// Reserve resources for a constant extender. Trigger an assertion if the +// reservation fails. +void HexagonPacketizerList::reserveResourcesForConstExt() { + if (!tryAllocateResourcesForConstExt(true)) + llvm_unreachable("Resources not available"); +} + +bool HexagonPacketizerList::canReserveResourcesForConstExt() { + return tryAllocateResourcesForConstExt(false); +} + +// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded, +// return true, otherwise, return false. +bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { + auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); + bool Avail = ResourceTracker->canReserveResources(ExtMI); + if (Reserve && Avail) + ResourceTracker->reserveResources(ExtMI); + MF.DeleteMachineInstr(ExtMI); + return Avail; +} + + +bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, + SDep::Kind DepType, unsigned DepReg) { + // Check for LR dependence. + if (DepReg == HRI->getRARegister()) + return true; + + if (HII->isDeallocRet(MI)) + if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) + return true; + + // Check if this is a predicate dependence. + const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); + if (RC == &Hexagon::PredRegsRegClass) + return true; + + // Assumes that the first operand of the CALLr is the function address. + if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { + MachineOperand MO = MI->getOperand(0); + if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) + return true; + } + + return false; +} + +static bool isRegDependence(const SDep::Kind DepType) { + return DepType == SDep::Data || DepType == SDep::Anti || + DepType == SDep::Output; +} + +static bool isDirectJump(const MachineInstr* MI) { + return MI->getOpcode() == Hexagon::J2_jump; +} + +static bool isSchedBarrier(const MachineInstr* MI) { + switch (MI->getOpcode()) { + case Hexagon::Y2_barrier: + return true; + } + return false; +} + +static bool isControlFlow(const MachineInstr* MI) { + return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); +} + + +/// Returns true if the instruction modifies a callee-saved register. +static bool doesModifyCalleeSavedReg(const MachineInstr *MI, + const TargetRegisterInfo *TRI) { + const MachineFunction &MF = *MI->getParent()->getParent(); + for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + if (MI->modifiesRegister(*CSR, TRI)) + return true; + return false; +} + +// TODO: MI->isIndirectBranch() and IsRegisterJump(MI) +// Returns true if an instruction can be promoted to .new predicate or +// new-value store. +bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) { + return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI); +} + +// Promote an instructiont to its .cur form. +// At this time, we have already made a call to canPromoteToDotCur and made +// sure that it can *indeed* be promoted. +bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert(DepType == SDep::Data); + int CurOpcode = HII->getDotCurOp(MI); + MI->setDesc(HII->get(CurOpcode)); + return true; +} + +void HexagonPacketizerList::cleanUpDotCur() { + MachineInstr *MI = NULL; + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); + if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { + MI = BI; + continue; + } + if (MI) { + for (auto &MO : BI->operands()) + if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg()) + return; + } + } + if (!MI) + return; + // We did not find a use of the CUR, so de-cur it. + MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); + DEBUG(dbgs() << "Demoted CUR "; MI->dump();); +} + +// Check to see if an instruction can be dot cur. +bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass *RC) { + if (!HII->isV60VectorInstruction(MI)) + return false; + if (!HII->isV60VectorInstruction(MII)) + return false; + + // Already a dot new instruction. + if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI)) + return false; + + if (!HII->mayBeCurLoad(MI)) + return false; + + // The "cur value" cannot come from inline asm. + if (PacketSU->getInstr()->isInlineAsm()) + return false; + + // Make sure candidate instruction uses cur. + DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; + MI->dump(); + dbgs() << "in packet\n";); + MachineInstr *MJ = MII; + DEBUG(dbgs() << "Checking CUR against "; MJ->dump();); + unsigned DestReg = MI->getOperand(0).getReg(); + bool FoundMatch = false; + for (auto &MO : MJ->operands()) + if (MO.isReg() && MO.getReg() == DestReg) + FoundMatch = true; + if (!FoundMatch) + return false; + + // Check for existing uses of a vector register within the packet which + // would be affected by converting a vector load into .cur formt. + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "packet has "; BI->dump();); + if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) + return false; + } + + DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump();); + // We can convert the opcode into a .cur. + return true; +} + +// Promote an instruction to its .new form. At this time, we have already +// made a call to canPromoteToDotNew and made sure that it can *indeed* be +// promoted. +bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert (DepType == SDep::Data); + int NewOpcode; + if (RC == &Hexagon::PredRegsRegClass) + NewOpcode = HII->getDotNewPredOp(MI, MBPI); + else + NewOpcode = HII->getDotNewOp(MI); + MI->setDesc(HII->get(NewOpcode)); + return true; +} + +bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) { + int NewOpcode = HII->getDotOldOp(MI->getOpcode()); + MI->setDesc(HII->get(NewOpcode)); + return true; +} + +enum PredicateKind { + PK_False, + PK_True, + PK_Unknown +}; + +/// Returns true if an instruction is predicated on p0 and false if it's +/// predicated on !p0. +static PredicateKind getPredicateSense(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + if (!HII->isPredicated(MI)) + return PK_Unknown; + if (HII->isPredicatedTrue(MI)) + return PK_True; + return PK_False; +} + +static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + assert(HII->isPostIncrement(MI) && "Not a post increment operation."); +#ifndef NDEBUG + // Post Increment means duplicates. Use dense map to find duplicates in the + // list. Caution: Densemap initializes with the minimum of 64 buckets, + // whereas there are at most 5 operands in the post increment. + DenseSet<unsigned> DefRegsSet; + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isDef()) + DefRegsSet.insert(MO.getReg()); + + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) + return MO; +#else + if (MI->mayLoad()) { + const MachineOperand &Op1 = MI->getOperand(1); + // The 2nd operand is always the post increment operand in load. + assert(Op1.isReg() && "Post increment operand has be to a register."); + return Op1; + } + if (MI->getDesc().mayStore()) { + const MachineOperand &Op0 = MI->getOperand(0); + // The 1st operand is always the post increment operand in store. + assert(Op0.isReg() && "Post increment operand has be to a register."); + return Op0; + } +#endif + // we should never come here. + llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); +} + +// Get the value being stored. +static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) { + // value being stored is always the last operand. + return MI->getOperand(MI->getNumOperands()-1); +} + +static bool isLoadAbsSet(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::L4_loadrd_ap: + case Hexagon::L4_loadrb_ap: + case Hexagon::L4_loadrh_ap: + case Hexagon::L4_loadrub_ap: + case Hexagon::L4_loadruh_ap: + case Hexagon::L4_loadri_ap: + return true; + } + return false; +} + +static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { + assert(isLoadAbsSet(MI)); + return MI->getOperand(1); +} + + +// Can be new value store? +// Following restrictions are to be respected in convert a store into +// a new value store. +// 1. If an instruction uses auto-increment, its address register cannot +// be a new-value register. Arch Spec 5.4.2.1 +// 2. If an instruction uses absolute-set addressing mode, its address +// register cannot be a new-value register. Arch Spec 5.4.2.1. +// 3. If an instruction produces a 64-bit result, its registers cannot be used +// as new-value registers. Arch Spec 5.4.2.2. +// 4. If the instruction that sets the new-value register is conditional, then +// the instruction that uses the new-value register must also be conditional, +// and both must always have their predicates evaluate identically. +// Arch Spec 5.4.2.3. +// 5. There is an implied restriction that a packet cannot have another store, +// if there is a new value store in the packet. Corollary: if there is +// already a store in a packet, there can not be a new value store. +// Arch Spec: 3.4.4.2 +bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, + const MachineInstr *PacketMI, unsigned DepReg) { + // Make sure we are looking at the store, that can be promoted. + if (!HII->mayBeNewStore(MI)) + return false; + + // Make sure there is dependency and can be new value'd. + const MachineOperand &Val = getStoreValueOperand(MI); + if (Val.isReg() && Val.getReg() != DepReg) + return false; + + const MCInstrDesc& MCID = PacketMI->getDesc(); + + // First operand is always the result. + const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); + // Double regs can not feed into new value store: PRM section: 5.4.2.2. + if (PacketRC == &Hexagon::DoubleRegsRegClass) + return false; + + // New-value stores are of class NV (slot 0), dual stores require class ST + // in slot 0 (PRM 5.5). + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + if (PacketSU->getInstr()->mayStore()) + return false; + } + + // Make sure it's NOT the post increment register that we are going to + // new value. + if (HII->isPostIncrement(MI) && + getPostIncrementOperand(MI, HII).getReg() == DepReg) { + return false; + } + + if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() && + getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { + // If source is post_inc, or absolute-set addressing, it can not feed + // into new value store + // r3 = memw(r2++#4) + // memw(r30 + #-1404) = r2.new -> can not be new value store + // arch spec section: 5.4.2.1. + return false; + } + + if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg) + return false; + + // If the source that feeds the store is predicated, new value store must + // also be predicated. + if (HII->isPredicated(PacketMI)) { + if (!HII->isPredicated(MI)) + return false; + + // Check to make sure that they both will have their predicates + // evaluate identically. + unsigned predRegNumSrc = 0; + unsigned predRegNumDst = 0; + const TargetRegisterClass* predRegClass = nullptr; + + // Get predicate register used in the source instruction. + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg()) + continue; + predRegNumSrc = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc); + if (predRegClass == &Hexagon::PredRegsRegClass) + break; + } + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated PacketMI instruction"); + + // Get predicate register used in new-value store instruction. + for (auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + predRegNumDst = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst); + if (predRegClass == &Hexagon::PredRegsRegClass) + break; + } + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated MI instruction"); + + // New-value register producer and user (store) need to satisfy these + // constraints: + // 1) Both instructions should be predicated on the same register. + // 2) If producer of the new-value register is .new predicated then store + // should also be .new predicated and if producer is not .new predicated + // then store should not be .new predicated. + // 3) Both new-value register producer and user should have same predicate + // sense, i.e, either both should be negated or both should be non-negated. + if (predRegNumDst != predRegNumSrc || + HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || + getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) + return false; + } + + // Make sure that other than the new-value register no other store instruction + // register has been modified in the same packet. Predicate registers can be + // modified by they should not be modified between the producer and the store + // instruction as it will make them both conditional on different values. + // We already know this to be true for all the instructions before and + // including PacketMI. Howerver, we need to perform the check for the + // remaining instructions in the packet. + + unsigned StartCheck = 0; + + for (auto I : CurrentPacketMIs) { + SUnit *TempSU = MIToSUnit.find(I)->second; + MachineInstr* TempMI = TempSU->getInstr(); + + // Following condition is true for all the instructions until PacketMI is + // reached (StartCheck is set to 0 before the for loop). + // StartCheck flag is 1 for all the instructions after PacketMI. + if (TempMI != PacketMI && !StartCheck) // Start processing only after + continue; // encountering PacketMI. + + StartCheck = 1; + if (TempMI == PacketMI) // We don't want to check PacketMI for dependence. + continue; + + for (auto &MO : MI->operands()) + if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) + return false; + } + + // Make sure that for non-POST_INC stores: + // 1. The only use of reg is DepReg and no other registers. + // This handles V4 base+index registers. + // The following store can not be dot new. + // Eg. r0 = add(r0, #3) + // memw(r1+r0<<#2) = r0 + if (!HII->isPostIncrement(MI)) { + for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg() && MO.getReg() == DepReg) + return false; + } + } + + // If data definition is because of implicit definition of the register, + // do not newify the store. Eg. + // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> + // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343] + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) + continue; + unsigned R = MO.getReg(); + if (R == DepReg || HRI->isSuperRegister(DepReg, R)) + return false; + } + + // Handle imp-use of super reg case. There is a target independent side + // change that should prevent this situation but I am handling it for + // just-in-case. For example, we cannot newify R2 in the following case: + // %R3<def> = A2_tfrsi 0; + // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>; + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) + return false; + } + + // Can be dot new store. + return true; +} + +// Can this MI to promoted to either new value store or new value jump. +bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, + MachineBasicBlock::iterator &MII) { + if (!HII->mayBeNewStore(MI)) + return false; + + // Check to see the store can be new value'ed. + MachineInstr *PacketMI = PacketSU->getInstr(); + if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) + return true; + + // Check to see the compare/jump can be new value'ed. + // This is done as a pass on its own. Don't need to check it here. + return false; +} + +static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { + for (auto &MO : I->operands()) + if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) + return true; + return false; +} + +// Check to see if an instruction can be dot new +// There are three kinds. +// 1. dot new on predicate - V2/V3/V4 +// 2. dot new on stores NV/ST - V4 +// 3. dot new on jump NV/J - V4 -- This is generated in a pass. +bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + // Already a dot new instruction. + if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) + return false; + + if (!isNewifiable(MI)) + return false; + + const MachineInstr *PI = PacketSU->getInstr(); + + // The "new value" cannot come from inline asm. + if (PI->isInlineAsm()) + return false; + + // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no + // sense. + if (PI->isImplicitDef()) + return false; + + // If dependency is trough an implicitly defined register, we should not + // newify the use. + if (isImplicitDependency(PI, DepReg)) + return false; + + const MCInstrDesc& MCID = PI->getDesc(); + const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); + if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass) + return false; + + // predicate .new + // bug 5670: until that is fixed + // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) + if (RC == &Hexagon::PredRegsRegClass) + if (HII->isCondInst(MI) || MI->isReturn()) + return HII->predCanBeUsedAsDotNew(PI, DepReg); + + if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) + return false; + + // Create a dot new machine instruction to see if resources can be + // allocated. If not, bail out now. + int NewOpcode = HII->getDotNewOp(MI); + const MCInstrDesc &D = HII->get(NewOpcode); + MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); + bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); + MF.DeleteMachineInstr(NewMI); + if (!ResourcesAvailable) + return false; + + // New Value Store only. New Value Jump generated as a separate pass. + if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII)) + return false; + + return true; +} + +// Go through the packet instructions and search for an anti dependency between +// them and DepReg from MI. Consider this case: +// Trying to add +// a) %R1<def> = TFRI_cdNotPt %P3, 2 +// to this packet: +// { +// b) %P0<def> = C2_or %P3<kill>, %P0<kill> +// c) %P3<def> = C2_tfrrp %R23 +// d) %R1<def> = C2_cmovenewit %P3, 4 +// } +// The P3 from a) and d) will be complements after +// a)'s P3 is converted to .new form +// Anti-dep between c) and b) is irrelevant for this case +bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, + unsigned DepReg) { + SUnit *PacketSUDep = MIToSUnit.find(MI)->second; + + for (auto I : CurrentPacketMIs) { + // We only care for dependencies to predicated instructions + if (!HII->isPredicated(I)) + continue; + + // Scheduling Unit for current insn in the packet + SUnit *PacketSU = MIToSUnit.find(I)->second; + + // Look at dependencies between current members of the packet and + // predicate defining instruction MI. Make sure that dependency is + // on the exact register we care about. + if (PacketSU->isSucc(PacketSUDep)) { + for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { + auto &Dep = PacketSU->Succs[i]; + if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti && + Dep.getReg() == DepReg) + return true; + } + } + } + + return false; +} + + +/// Gets the predicate register of a predicated instruction. +static unsigned getPredicatedRegister(MachineInstr *MI, + const HexagonInstrInfo *QII) { + /// We use the following rule: The first predicate register that is a use is + /// the predicate register of a predicated instruction. + assert(QII->isPredicated(MI) && "Must be predicated instruction"); + + for (auto &Op : MI->operands()) { + if (Op.isReg() && Op.getReg() && Op.isUse() && + Hexagon::PredRegsRegClass.contains(Op.getReg())) + return Op.getReg(); + } + + llvm_unreachable("Unknown instruction operand layout"); + return 0; +} + +// Given two predicated instructions, this function detects whether +// the predicates are complements. +bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, + MachineInstr *MI2) { + // If we don't know the predicate sense of the instructions bail out early, we + // need it later. + if (getPredicateSense(MI1, HII) == PK_Unknown || + getPredicateSense(MI2, HII) == PK_Unknown) + return false; + + // Scheduling unit for candidate. + SUnit *SU = MIToSUnit[MI1]; + + // One corner case deals with the following scenario: + // Trying to add + // a) %R24<def> = A2_tfrt %P0, %R25 + // to this packet: + // { + // b) %R25<def> = A2_tfrf %P0, %R24 + // c) %P0<def> = C2_cmpeqi %R26, 1 + // } + // + // On general check a) and b) are complements, but presence of c) will + // convert a) to .new form, and then it is not a complement. + // We attempt to detect it by analyzing existing dependencies in the packet. + + // Analyze relationships between all existing members of the packet. + // Look for Anti dependecy on the same predicate reg as used in the + // candidate. + for (auto I : CurrentPacketMIs) { + // Scheduling Unit for current insn in the packet. + SUnit *PacketSU = MIToSUnit.find(I)->second; + + // If this instruction in the packet is succeeded by the candidate... + if (PacketSU->isSucc(SU)) { + for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { + auto Dep = PacketSU->Succs[i]; + // The corner case exist when there is true data dependency between + // candidate and one of current packet members, this dep is on + // predicate reg, and there already exist anti dep on the same pred in + // the packet. + if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data && + Hexagon::PredRegsRegClass.contains(Dep.getReg())) { + // Here I know that I is predicate setting instruction with true + // data dep to candidate on the register we care about - c) in the + // above example. Now I need to see if there is an anti dependency + // from c) to any other instruction in the same packet on the pred + // reg of interest. + if (restrictingDepExistInPacket(I, Dep.getReg())) + return false; + } + } + } + } + + // If the above case does not apply, check regular complement condition. + // Check that the predicate register is the same and that the predicate + // sense is different We also need to differentiate .old vs. .new: !p0 + // is not complementary to p0.new. + unsigned PReg1 = getPredicatedRegister(MI1, HII); + unsigned PReg2 = getPredicatedRegister(MI2, HII); + return PReg1 == PReg2 && + Hexagon::PredRegsRegClass.contains(PReg1) && + Hexagon::PredRegsRegClass.contains(PReg2) && + getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && + HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); +} + +// Initialize packetizer flags. +void HexagonPacketizerList::initPacketizerState() { + Dependence = false; + PromotedToDotNew = false; + GlueToNewValueJump = false; + GlueAllocframeStore = false; + FoundSequentialDependence = false; +} + +// Ignore bundling of pseudo instructions. +bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock*) { + if (MI->isDebugValue()) + return true; + + if (MI->isCFIInstruction()) + return false; + + // We must print out inline assembly. + if (MI->isInlineAsm()) + return false; + + if (MI->isImplicitDef()) + return false; + + // We check if MI has any functional units mapped to it. If it doesn't, + // we ignore the instruction. + const MCInstrDesc& TID = MI->getDesc(); + auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); + unsigned FuncUnits = IS->getUnits(); + return !FuncUnits; +} + +bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) { + if (MI->isEHLabel() || MI->isCFIInstruction()) + return true; + + // Consider inline asm to not be a solo instruction by default. + // Inline asm will be put in a packet temporarily, but then it will be + // removed, and placed outside of the packet (before or after, depending + // on dependencies). This is to reduce the impact of inline asm as a + // "packet splitting" instruction. + if (MI->isInlineAsm() && !ScheduleInlineAsm) + return true; + + // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: + // trap, pause, barrier, icinva, isync, and syncht are solo instructions. + // They must not be grouped with other instructions in a packet. + if (isSchedBarrier(MI)) + return true; + + if (HII->isSolo(MI)) + return true; + + if (MI->getOpcode() == Hexagon::A2_nop) + return true; + + return false; +} + + +// Quick check if instructions MI and MJ cannot coexist in the same packet. +// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", +// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". +// For full test call this function twice: +// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) +// Doing the test only one way saves the amount of code in this function, +// since every test would need to be repeated with the MI and MJ reversed. +static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, + const HexagonInstrInfo &HII) { + const MachineFunction *MF = MI->getParent()->getParent(); + if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() && + HII.isHVXMemWithAIndirect(MI, MJ)) + return true; + + // An inline asm cannot be together with a branch, because we may not be + // able to remove the asm out after packetizing (i.e. if the asm must be + // moved past the bundle). Similarly, two asms cannot be together to avoid + // complications when determining their relative order outside of a bundle. + if (MI->isInlineAsm()) + return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() || + MJ->isCall() || MJ->isTerminator(); + + // "False" really means that the quick check failed to determine if + // I and J cannot coexist. + return false; +} + + +// Full, symmetric check. +bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI, + const MachineInstr *MJ) { + return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); +} + +void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { + for (auto &B : MF) { + MachineBasicBlock::iterator BundleIt; + MachineBasicBlock::instr_iterator NextI; + for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { + NextI = std::next(I); + MachineInstr *MI = &*I; + if (MI->isBundle()) + BundleIt = I; + if (!MI->isInsideBundle()) + continue; + + // Decide on where to insert the instruction that we are pulling out. + // Debug instructions always go before the bundle, but the placement of + // INLINE_ASM depends on potential dependencies. By default, try to + // put it before the bundle, but if the asm writes to a register that + // other instructions in the bundle read, then we need to place it + // after the bundle (to preserve the bundle semantics). + bool InsertBeforeBundle; + if (MI->isInlineAsm()) + InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI); + else if (MI->isDebugValue()) + InsertBeforeBundle = true; + else + continue; + + BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle); + } + } +} + +// Check if a given instruction is of class "system". +static bool isSystemInstr(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::Y2_barrier: + case Hexagon::Y2_dcfetchbo: + return true; + } + return false; +} + +bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, + const MachineInstr *J) { + // The dependence graph may not include edges between dead definitions, + // so without extra checks, we could end up packetizing two instruction + // defining the same (dead) register. + if (I->isCall() || J->isCall()) + return false; + if (HII->isPredicated(I) || HII->isPredicated(J)) + return false; + + BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); + for (auto &MO : I->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + DeadDefs[MO.getReg()] = true; + } + + for (auto &MO : J->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + unsigned R = MO.getReg(); + if (R != Hexagon::USR_OVF && DeadDefs[R]) + return true; + } + return false; +} + +bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, + const MachineInstr *J) { + // A save callee-save register function call can only be in a packet + // with instructions that don't write to the callee-save registers. + if ((HII->isSaveCalleeSavedRegsCall(I) && + doesModifyCalleeSavedReg(J, HRI)) || + (HII->isSaveCalleeSavedRegsCall(J) && + doesModifyCalleeSavedReg(I, HRI))) + return true; + + // Two control flow instructions cannot go in the same packet. + if (isControlFlow(I) && isControlFlow(J)) + return true; + + // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot + // contain a speculative indirect jump, + // a new-value compare jump or a dealloc_return. + auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { + if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) + return true; + if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) + return true; + return false; + }; + + if (HII->isLoopN(I) && isBadForLoopN(J)) + return true; + if (HII->isLoopN(J) && isBadForLoopN(I)) + return true; + + // dealloc_return cannot appear in the same packet as a conditional or + // unconditional jump. + return HII->isDeallocRet(I) && + (J->isBranch() || J->isCall() || J->isBarrier()); +} + +bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, + const MachineInstr *J) { + bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); + bool StoreI = I->mayStore(), StoreJ = J->mayStore(); + if ((SysI && StoreJ) || (SysJ && StoreI)) + return true; + + if (StoreI && StoreJ) { + if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I)) + return true; + } else { + // A memop cannot be in the same packet with another memop or a store. + // Two stores can be together, but here I and J cannot both be stores. + bool MopStI = HII->isMemOp(I) || StoreI; + bool MopStJ = HII->isMemOp(J) || StoreJ; + if (MopStI && MopStJ) + return true; + } + + return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J)); +} + +// SUI is the current instruction that is out side of the current packet. +// SUJ is the current instruction inside the current packet against which that +// SUI will be packetized. +bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); + + // Clear IgnoreDepMIs when Packet starts. + if (CurrentPacketMIs.size() == 1) + IgnoreDepMIs.clear(); + + MachineBasicBlock::iterator II = I; + const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + + // Solo instructions cannot go in the packet. + assert(!isSoloInstruction(I) && "Unexpected solo instr!"); + + if (cannotCoexist(I, J)) + return false; + + Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J); + if (Dependence) + return false; + + // V4 allows dual stores. It does not allow second store, if the first + // store is not in SLOT0. New value store, new value jump, dealloc_return + // and memop always take SLOT0. Arch spec 3.4.4.2. + Dependence = hasV4SpecificDependence(I, J); + if (Dependence) + return false; + + // If an instruction feeds new value jump, glue it. + MachineBasicBlock::iterator NextMII = I; + ++NextMII; + if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) { + MachineInstr *NextMI = NextMII; + + bool secondRegMatch = false; + const MachineOperand &NOp0 = NextMI->getOperand(0); + const MachineOperand &NOp1 = NextMI->getOperand(1); + + if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) + secondRegMatch = true; + + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + MachineInstr *PI = PacketSU->getInstr(); + // NVJ can not be part of the dual jump - Arch Spec: section 7.8. + if (PI->isCall()) { + Dependence = true; + break; + } + // Validate: + // 1. Packet does not have a store in it. + // 2. If the first operand of the nvj is newified, and the second + // operand is also a reg, it (second reg) is not defined in + // the same packet. + // 3. If the second operand of the nvj is newified, (which means + // first operand is also a reg), first reg is not defined in + // the same packet. + if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || + HII->isLoopN(PI)) { + Dependence = true; + break; + } + // Check #2/#3. + const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; + if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { + Dependence = true; + break; + } + } + + if (Dependence) + return false; + GlueToNewValueJump = true; + } + + // There no dependency between a prolog instruction and its successor. + if (!SUJ->isSucc(SUI)) + return true; + + for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { + if (FoundSequentialDependence) + break; + + if (SUJ->Succs[i].getSUnit() != SUI) + continue; + + SDep::Kind DepType = SUJ->Succs[i].getKind(); + // For direct calls: + // Ignore register dependences for call instructions for packetization + // purposes except for those due to r31 and predicate registers. + // + // For indirect calls: + // Same as direct calls + check for true dependences to the register + // used in the indirect call. + // + // We completely ignore Order dependences for call instructions. + // + // For returns: + // Ignore register dependences for return instructions like jumpr, + // dealloc return unless we have dependencies on the explicit uses + // of the registers used by jumpr (like r31) or dealloc return + // (like r29 or r30). + // + // TODO: Currently, jumpr is handling only return of r31. So, the + // following logic (specificaly isCallDependent) is working fine. + // We need to enable jumpr for register other than r31 and then, + // we need to rework the last part, where it handles indirect call + // of that (isCallDependent) function. Bug 6216 is opened for this. + unsigned DepReg = 0; + const TargetRegisterClass *RC = nullptr; + if (DepType == SDep::Data) { + DepReg = SUJ->Succs[i].getReg(); + RC = HRI->getMinimalPhysRegClass(DepReg); + } + + if (I->isCall() || I->isReturn()) { + if (!isRegDependence(DepType)) + continue; + if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) + continue; + } + + if (DepType == SDep::Data) { + if (canPromoteToDotCur(J, SUJ, DepReg, II, RC)) + if (promoteToDotCur(J, DepType, II, RC)) + continue; + } + + // Data dpendence ok if we have load.cur. + if (DepType == SDep::Data && HII->isDotCurInst(J)) { + if (HII->isV60VectorInstruction(I)) + continue; + } + + // For instructions that can be promoted to dot-new, try to promote. + if (DepType == SDep::Data) { + if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { + if (promoteToDotNew(I, DepType, II, RC)) { + PromotedToDotNew = true; + continue; + } + } + if (HII->isNewValueJump(I)) + continue; + } + + // For predicated instructions, if the predicates are complements then + // there can be no dependence. + if (HII->isPredicated(I) && HII->isPredicated(J) && + arePredicatesComplements(I, J)) { + // Not always safe to do this translation. + // DAG Builder attempts to reduce dependence edges using transitive + // nature of dependencies. Here is an example: + // + // r0 = tfr_pt ... (1) + // r0 = tfr_pf ... (2) + // r0 = tfr_pt ... (3) + // + // There will be an output dependence between (1)->(2) and (2)->(3). + // However, there is no dependence edge between (1)->(3). This results + // in all 3 instructions going in the same packet. We ignore dependce + // only once to avoid this situation. + auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J); + if (Itr != IgnoreDepMIs.end()) { + Dependence = true; + return false; + } + IgnoreDepMIs.push_back(I); + continue; + } + + // Ignore Order dependences between unconditional direct branches + // and non-control-flow instructions. + if (isDirectJump(I) && !J->isBranch() && !J->isCall() && + DepType == SDep::Order) + continue; + + // Ignore all dependences for jumps except for true and output + // dependences. + if (I->isConditionalBranch() && DepType != SDep::Data && + DepType != SDep::Output) + continue; + + // Ignore output dependences due to superregs. We can write to two + // different subregisters of R1:0 for instance in the same cycle. + + // If neither I nor J defines DepReg, then this is a superfluous output + // dependence. The dependence must be of the form: + // R0 = ... + // R1 = ... + // and there is an output dependence between the two instructions with + // DepReg = D0. + // We want to ignore these dependences. Ideally, the dependence + // constructor should annotate such dependences. We can then avoid this + // relatively expensive check. + // + if (DepType == SDep::Output) { + // DepReg is the register that's responsible for the dependence. + unsigned DepReg = SUJ->Succs[i].getReg(); + + // Check if I and J really defines DepReg. + if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg)) + continue; + FoundSequentialDependence = true; + break; + } + + // For Order dependences: + // 1. On V4 or later, volatile loads/stores can be packetized together, + // unless other rules prevent is. + // 2. Store followed by a load is not allowed. + // 3. Store followed by a store is only valid on V4 or later. + // 4. Load followed by any memory operation is allowed. + if (DepType == SDep::Order) { + if (!PacketizeVolatiles) { + bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef(); + if (OrdRefs) { + FoundSequentialDependence = true; + break; + } + } + // J is first, I is second. + bool LoadJ = J->mayLoad(), StoreJ = J->mayStore(); + bool LoadI = I->mayLoad(), StoreI = I->mayStore(); + if (StoreJ) { + // Two stores are only allowed on V4+. Load following store is never + // allowed. + if (LoadI) { + FoundSequentialDependence = true; + break; + } + } else if (!LoadJ || (!LoadI && !StoreI)) { + // If J is neither load nor store, assume a dependency. + // If J is a load, but I is neither, also assume a dependency. + FoundSequentialDependence = true; + break; + } + // Store followed by store: not OK on V2. + // Store followed by load: not OK on all. + // Load followed by store: OK on all. + // Load followed by load: OK on all. + continue; + } + + // For V4, special case ALLOCFRAME. Even though there is dependency + // between ALLOCFRAME and subsequent store, allow it to be packetized + // in a same packet. This implies that the store is using the caller's + // SP. Hence, offset needs to be updated accordingly. + if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) { + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Hexagon::S2_storerd_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + if (I->getOperand(0).getReg() == HRI->getStackRegister()) { + int64_t Imm = I->getOperand(1).getImm(); + int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE); + if (HII->isValidOffset(Opc, NewOff)) { + GlueAllocframeStore = true; + // Since this store is to be glued with allocframe in the same + // packet, it will use SP of the previous stack frame, i.e. + // caller's SP. Therefore, we need to recalculate offset + // according to this change. + I->getOperand(1).setImm(NewOff); + continue; + } + } + default: + break; + } + } + + // Skip over anti-dependences. Two instructions that are anti-dependent + // can share a packet. + if (DepType != SDep::Anti) { + FoundSequentialDependence = true; + break; + } + } + + if (FoundSequentialDependence) { + Dependence = true; + return false; + } + + return true; +} + +bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); + + if (cannotCoexist(I, J)) + return false; + + if (!Dependence) + return true; + + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old. + if (PromotedToDotNew) + demoteToDotOld(I); + + cleanUpDotCur(); + // Check if the instruction (must be a store) was glued with an allocframe + // instruction. If so, restore its offset to its original value, i.e. use + // current SP instead of caller's SP. + if (GlueAllocframeStore) { + unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + MachineOperand &MOff = I->getOperand(1); + MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE); + } + return false; +} + + +MachineBasicBlock::iterator +HexagonPacketizerList::addToPacket(MachineInstr *MI) { + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock *MBB = MI->getParent(); + if (MI->isImplicitDef()) { + unsigned R = MI->getOperand(0).getReg(); + if (Hexagon::IntRegsRegClass.contains(R)) { + MCSuperRegIterator S(R, HRI, false); + MI->addOperand(MachineOperand::CreateReg(*S, true, true)); + } + return MII; + } + assert(ResourceTracker->canReserveResources(MI)); + + bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); + bool Good = true; + + if (GlueToNewValueJump) { + MachineInstr *NvjMI = ++MII; + // We need to put both instructions in the same packet: MI and NvjMI. + // Either of them can require a constant extender. Try to add both to + // the current packet, and if that fails, end the packet and start a + // new one. + ResourceTracker->reserveResources(MI); + if (ExtMI) + Good = tryAllocateResourcesForConstExt(true); + + bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); + if (Good) { + if (ResourceTracker->canReserveResources(NvjMI)) + ResourceTracker->reserveResources(NvjMI); + else + Good = false; + } + if (Good && ExtNvjMI) + Good = tryAllocateResourcesForConstExt(true); + + if (!Good) { + endPacket(MBB, MI); + assert(ResourceTracker->canReserveResources(MI)); + ResourceTracker->reserveResources(MI); + if (ExtMI) { + assert(canReserveResourcesForConstExt()); + tryAllocateResourcesForConstExt(true); + } + assert(ResourceTracker->canReserveResources(NvjMI)); + ResourceTracker->reserveResources(NvjMI); + if (ExtNvjMI) { + assert(canReserveResourcesForConstExt()); + reserveResourcesForConstExt(); + } + } + CurrentPacketMIs.push_back(MI); + CurrentPacketMIs.push_back(NvjMI); + return MII; + } + + ResourceTracker->reserveResources(MI); + if (ExtMI && !tryAllocateResourcesForConstExt(true)) { + endPacket(MBB, MI); + if (PromotedToDotNew) + demoteToDotOld(MI); + ResourceTracker->reserveResources(MI); + reserveResourcesForConstExt(); + } + + CurrentPacketMIs.push_back(MI); + return MII; +} + +void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + OldPacketMIs = CurrentPacketMIs; + VLIWPacketizerList::endPacket(MBB, MI); +} + +bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) { + return !producesStall(MI); +} + + +// Return true when ConsMI uses a register defined by ProdMI. +static bool isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) { + if (!ProdMI->getOperand(0).isReg()) + return false; + unsigned DstReg = ProdMI->getOperand(0).getReg(); + + for (auto &Op : ConsMI->operands()) + if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) + // The MIs depend on each other. + return true; + + return false; +} + +// V60 forward scheduling. +bool HexagonPacketizerList::producesStall(const MachineInstr *I) { + // Check whether the previous packet is in a different loop. If this is the + // case, there is little point in trying to avoid a stall because that would + // favor the rare case (loop entry) over the common case (loop iteration). + // + // TODO: We should really be able to check all the incoming edges if this is + // the first packet in a basic block, so we can avoid stalls from the loop + // backedge. + if (!OldPacketMIs.empty()) { + auto *OldBB = OldPacketMIs.front()->getParent(); + auto *ThisBB = I->getParent(); + if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) + return false; + } + + // Check for stall between two vector instructions. + if (HII->isV60VectorInstruction(I)) { + for (auto J : OldPacketMIs) { + if (!HII->isV60VectorInstruction(J)) + continue; + if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I)) + return true; + } + return false; + } + + // Check for stall between two scalar instructions. First, check that + // there is no definition of a use in the current packet, because it + // may be a candidate for .new. + for (auto J : CurrentPacketMIs) + if (!HII->isV60VectorInstruction(J) && isDependent(J, I)) + return false; + + // Check for stall between I and instructions in the previous packet. + if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) { + for (auto J : OldPacketMIs) { + if (HII->isV60VectorInstruction(J)) + continue; + if (!HII->isLateInstrFeedsEarlyInstr(J, I)) + continue; + if (isDependent(J, I) && !HII->canExecuteInBundle(J, I)) + return true; + } + } + + return false; +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonPacketizer() { + return new HexagonPacketizer(); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h new file mode 100644 index 0000000..960cf6c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -0,0 +1,114 @@ +#ifndef HEXAGONVLIWPACKETIZER_H +#define HEXAGONVLIWPACKETIZER_H + +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" + +namespace llvm { +class HexagonPacketizerList : public VLIWPacketizerList { + // Vector of instructions assigned to the packet that has just been created. + std::vector<MachineInstr*> OldPacketMIs; + + // Has the instruction been promoted to a dot-new instruction. + bool PromotedToDotNew; + + // Has the instruction been glued to allocframe. + bool GlueAllocframeStore; + + // Has the feeder instruction been glued to new value jump. + bool GlueToNewValueJump; + + // Check if there is a dependence between some instruction already in this + // packet and this instruction. + bool Dependence; + + // Only check for dependence if there are resources available to + // schedule this instruction. + bool FoundSequentialDependence; + + // Track MIs with ignored dependence. + std::vector<MachineInstr*> IgnoreDepMIs; + +protected: + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + const MachineLoopInfo *MLI; + +private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + +public: + // Ctor. + HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + AliasAnalysis *AA, + const MachineBranchProbabilityInfo *MBPI); + + // initPacketizerState - initialize some internal flags. + void initPacketizerState() override; + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + bool ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock *MBB) override; + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + bool isSoloInstruction(const MachineInstr *MI) override; + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override; + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; + void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override; + bool shouldAddToPacket(const MachineInstr *MI) override; + + void unpacketizeSoloInstrs(MachineFunction &MF); + +protected: + bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType, + unsigned DepReg); + bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + void cleanUpDotCur(); + + bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII); + bool canPromoteToNewValueStore(const MachineInstr* MI, + const MachineInstr* PacketMI, unsigned DepReg); + bool demoteToDotOld(MachineInstr* MI); + bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2); + bool restrictingDepExistInPacket(MachineInstr*, unsigned); + bool isNewifiable(const MachineInstr *MI); + bool isCurifiable(MachineInstr* MI); + bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ); + inline bool isPromotedToDotNew() const { + return PromotedToDotNew; + } + bool tryAllocateResourcesForConstExt(bool Reserve); + bool canReserveResourcesForConstExt(); + void reserveResourcesForConstExt(); + bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J); + bool hasControlDependence(const MachineInstr *I, const MachineInstr *J); + bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J); + bool producesStall(const MachineInstr *MI); +}; +} // namespace llvm +#endif // HEXAGONVLIWPACKETIZER_H + diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp new file mode 100644 index 0000000..b73af82 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -0,0 +1,360 @@ +//===-- HexagonAsmBackend.cpp - Hexagon Assembler Backend -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonFixupKinds.h" +#include "HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; +using namespace Hexagon; + +#define DEBUG_TYPE "hexagon-asm-backend" + +namespace { + +class HexagonAsmBackend : public MCAsmBackend { + uint8_t OSABI; + StringRef CPU; + mutable uint64_t relaxedCnt; + std::unique_ptr <MCInstrInfo> MCII; + std::unique_ptr <MCInst *> RelaxTarget; + MCInst * Extender; +public: + HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) : + OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *), + Extender(nullptr) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createHexagonELFObjectWriter(OS, OSABI, CPU); + } + + void setExtender(MCContext &Context) const { + if (Extender == nullptr) + const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst; + } + + MCInst *takeExtender() const { + assert(Extender != nullptr); + MCInst * Result = Extender; + const_cast<HexagonAsmBackend *>(this)->Extender = nullptr; + return Result; + } + + unsigned getNumFixupKinds() const override { + return Hexagon::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo Infos[Hexagon::NumTargetFixupKinds] = { + // This table *must* be in same the order of fixup_* kinds in + // HexagonFixupKinds.h. + // + // namei offset bits flags + {"fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_LO16", 0, 32, 0}, + {"fixup_Hexagon_HI16", 0, 32, 0}, + {"fixup_Hexagon_32", 0, 32, 0}, + {"fixup_Hexagon_16", 0, 32, 0}, + {"fixup_Hexagon_8", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_0", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_1", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_2", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_3", 0, 32, 0}, + {"fixup_Hexagon_HL16", 0, 32, 0}, + {"fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_16_X", 0, 32, 0}, + {"fixup_Hexagon_12_X", 0, 32, 0}, + {"fixup_Hexagon_11_X", 0, 32, 0}, + {"fixup_Hexagon_10_X", 0, 32, 0}, + {"fixup_Hexagon_9_X", 0, 32, 0}, + {"fixup_Hexagon_8_X", 0, 32, 0}, + {"fixup_Hexagon_7_X", 0, 32, 0}, + {"fixup_Hexagon_6_X", 0, 32, 0}, + {"fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_COPY", 0, 32, 0}, + {"fixup_Hexagon_GLOB_DAT", 0, 32, 0}, + {"fixup_Hexagon_JMP_SLOT", 0, 32, 0}, + {"fixup_Hexagon_RELATIVE", 0, 32, 0}, + {"fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GOTREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_32", 0, 32, 0}, + {"fixup_Hexagon_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_DTPMOD_32", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_32", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_16", 0, 32, 0}, + {"fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GD_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_IE_LO16", 0, 32, 0}, + {"fixup_Hexagon_IE_HI16", 0, 32, 0}, + {"fixup_Hexagon_IE_32", 0, 32, 0}, + {"fixup_Hexagon_IE_16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_32", 0, 32, 0}, + {"fixup_Hexagon_TPREL_16", 0, 32, 0}, + {"fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_11_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_11_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_IE_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_IE_16_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_11_X", 0, 32, 0}}; + + if (Kind < FirstTargetFixupKind) { + return MCAsmBackend::getFixupKindInfo(Kind); + } + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/, + unsigned /*DataSize*/, uint64_t /*Value*/, + bool /*IsPCRel*/) const override { + return; + } + + bool isInstRelaxable(MCInst const &HMI) const { + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI); + bool Relaxable = false; + // Branches and loop-setup insns are handled as necessary by relaxation. + if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ || + (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV && + MCID.isBranch()) || + (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR && + HMI.getOpcode() != Hexagon::C4_addipc)) + if (HexagonMCInstrInfo::isExtendable(*MCII, HMI)) + Relaxable = true; + + return Relaxable; + } + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool mayNeedRelaxation(MCInst const &Inst) const override { + assert(HexagonMCInstrInfo::isBundle(Inst)); + bool PreviousIsExtender = false; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { + auto const &Inst = *I.getInst(); + if (!PreviousIsExtender) { + if (isInstRelaxable(Inst)) + return true; + } + PreviousIsExtender = HexagonMCInstrInfo::isImmext(Inst); + } + return false; + } + + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + MCInst const &MCB = DF->getInst(); + assert(HexagonMCInstrInfo::isBundle(MCB)); + + *RelaxTarget = nullptr; + MCInst &MCI = const_cast<MCInst &>(HexagonMCInstrInfo::instruction( + MCB, Fixup.getOffset() / HEXAGON_INSTR_SIZE)); + // If we cannot resolve the fixup value, it requires relaxation. + if (!Resolved) { + switch ((unsigned)Fixup.getKind()) { + case fixup_Hexagon_B22_PCREL: + // GetFixupCount assumes B22 won't relax + // Fallthrough + default: + return false; + break; + case fixup_Hexagon_B13_PCREL: + case fixup_Hexagon_B15_PCREL: + case fixup_Hexagon_B9_PCREL: + case fixup_Hexagon_B7_PCREL: { + if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { + ++relaxedCnt; + *RelaxTarget = &MCI; + setExtender(Layout.getAssembler().getContext()); + return true; + } else { + return false; + } + break; + } + } + } + bool Relaxable = isInstRelaxable(MCI); + if (Relaxable == false) + return false; + + MCFixupKind Kind = Fixup.getKind(); + int64_t sValue = Value; + int64_t maxValue; + + switch ((unsigned)Kind) { + case fixup_Hexagon_B7_PCREL: + maxValue = 1 << 8; + break; + case fixup_Hexagon_B9_PCREL: + maxValue = 1 << 10; + break; + case fixup_Hexagon_B15_PCREL: + maxValue = 1 << 16; + break; + case fixup_Hexagon_B22_PCREL: + maxValue = 1 << 23; + break; + default: + maxValue = INT64_MAX; + break; + } + + bool isFarAway = -maxValue > sValue || sValue > maxValue - 1; + + if (isFarAway) { + if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { + ++relaxedCnt; + *RelaxTarget = &MCI; + setExtender(Layout.getAssembler().getContext()); + return true; + } + } + + return false; + } + + /// Simple predicate for targets where !Resolved implies requiring relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced"); + } + + void relaxInstruction(MCInst const & Inst, + MCInst & Res) const override { + assert(HexagonMCInstrInfo::isBundle(Inst) && + "Hexagon relaxInstruction only works on bundles"); + + Res = HexagonMCInstrInfo::createBundle(); + // Copy the results into the bundle. + bool Update = false; + for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { + MCInst &CrntHMI = const_cast<MCInst &>(*I.getInst()); + + // if immediate extender needed, add it in + if (*RelaxTarget == &CrntHMI) { + Update = true; + assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) && + "No room to insert extender for relaxation"); + + MCInst *HMIx = takeExtender(); + *HMIx = HexagonMCInstrInfo::deriveExtender( + *MCII, CrntHMI, + HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI)); + Res.addOperand(MCOperand::createInst(HMIx)); + *RelaxTarget = nullptr; + } + // now copy over the original instruction(the one we may have extended) + Res.addOperand(MCOperand::createInst(I.getInst())); + } + (void)Update; + assert(Update && "Didn't find relaxation target"); + } + + bool writeNopData(uint64_t Count, + MCObjectWriter * OW) const override { + static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP. + ParseIn = 0x00004000, // In packet parse-bits. + ParseEnd = 0x0000c000; // End of packet parse-bits. + + while(Count % HEXAGON_INSTR_SIZE) { + DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" << + Count % HEXAGON_INSTR_SIZE << "/" << HEXAGON_INSTR_SIZE << "\n"); + --Count; + OW->write8(0); + } + + while(Count) { + Count -= HEXAGON_INSTR_SIZE; + // Close the packet whenever a multiple of the maximum packet size remains + uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))? + ParseIn: ParseEnd; + OW->write32(Nopcode | ParseBits); + } + return true; + } +}; +} // end anonymous namespace + +namespace llvm { +MCAsmBackend *createHexagonAsmBackend(Target const &T, + MCRegisterInfo const & /*MRI*/, + const Triple &TT, StringRef CPU) { + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + return new HexagonAsmBackend(T, OSABI, CPU); +} +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h new file mode 100644 index 0000000..47a6f86 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -0,0 +1,285 @@ +//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the Hexagon target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H +#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H + +#include "HexagonMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" +#include <stdint.h> + +namespace llvm { + +/// HexagonII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace HexagonII { + // *** The code below must match HexagonInstrFormat*.td *** // + + // Insn types. + // *** Must match HexagonInstrFormat*.td *** + enum Type { + TypePSEUDO = 0, + TypeALU32 = 1, + TypeCR = 2, + TypeJR = 3, + TypeJ = 4, + TypeLD = 5, + TypeST = 6, + TypeSYSTEM = 7, + TypeXTYPE = 8, + TypeMEMOP = 9, + TypeNV = 10, + TypeDUPLEX = 11, + TypeCOMPOUND = 12, + TypeCVI_FIRST = 13, + TypeCVI_VA = TypeCVI_FIRST, + TypeCVI_VA_DV = 14, + TypeCVI_VX = 15, + TypeCVI_VX_DV = 16, + TypeCVI_VP = 17, + TypeCVI_VP_VS = 18, + TypeCVI_VS = 19, + TypeCVI_VINLANESAT= 20, + TypeCVI_VM_LD = 21, + TypeCVI_VM_TMP_LD = 22, + TypeCVI_VM_CUR_LD = 23, + TypeCVI_VM_VP_LDU = 24, + TypeCVI_VM_ST = 25, + TypeCVI_VM_NEW_ST = 26, + TypeCVI_VM_STU = 27, + TypeCVI_HIST = 28, + TypeCVI_LAST = TypeCVI_HIST, + TypePREFIX = 30, // Such as extenders. + TypeENDLOOP = 31 // Such as end of a HW loop. + }; + + enum SubTarget { + HasV2SubT = 0xf, + HasV2SubTOnly = 0x1, + NoV2SubT = 0x0, + HasV3SubT = 0xe, + HasV3SubTOnly = 0x2, + NoV3SubT = 0x1, + HasV4SubT = 0xc, + NoV4SubT = 0x3, + HasV5SubT = 0x8, + NoV5SubT = 0x7 + }; + + enum AddrMode { + NoAddrMode = 0, // No addressing mode + Absolute = 1, // Absolute addressing mode + AbsoluteSet = 2, // Absolute set addressing mode + BaseImmOffset = 3, // Indirect with offset + BaseLongOffset = 4, // Indirect with long offset + BaseRegOffset = 5, // Indirect with register offset + PostInc = 6 // Post increment addressing mode + }; + + // MemAccessSize is represented as 1+log2(N) where N is size in bits. + enum class MemAccessSize { + NoMemAccess = 0, // Not a memory acces instruction. + ByteAccess = 1, // Byte access instruction (memb). + HalfWordAccess = 2, // Half word access instruction (memh). + WordAccess = 3, // Word access instruction (memw). + DoubleWordAccess = 4, // Double word access instruction (memd) + // 5, // We do not have a 16 byte vector access. + Vector64Access = 7, // 64 Byte vector access instruction (vmem). + Vector128Access = 8 // 128 Byte vector access instruction (vmem). + }; + + // MCInstrDesc TSFlags + // *** Must match HexagonInstrFormat*.td *** + enum { + // This 5-bit field describes the insn type. + TypePos = 0, + TypeMask = 0x1f, + + // Solo instructions. + SoloPos = 5, + SoloMask = 0x1, + // Packed only with A or X-type instructions. + SoloAXPos = 6, + SoloAXMask = 0x1, + // Only A-type instruction in first slot or nothing. + SoloAin1Pos = 7, + SoloAin1Mask = 0x1, + + // Predicated instructions. + PredicatedPos = 8, + PredicatedMask = 0x1, + PredicatedFalsePos = 9, + PredicatedFalseMask = 0x1, + PredicatedNewPos = 10, + PredicatedNewMask = 0x1, + PredicateLatePos = 11, + PredicateLateMask = 0x1, + + // New-Value consumer instructions. + NewValuePos = 12, + NewValueMask = 0x1, + // New-Value producer instructions. + hasNewValuePos = 13, + hasNewValueMask = 0x1, + // Which operand consumes or produces a new value. + NewValueOpPos = 14, + NewValueOpMask = 0x7, + // Stores that can become new-value stores. + mayNVStorePos = 17, + mayNVStoreMask = 0x1, + // New-value store instructions. + NVStorePos = 18, + NVStoreMask = 0x1, + // Loads that can become current-value loads. + mayCVLoadPos = 19, + mayCVLoadMask = 0x1, + // Current-value load instructions. + CVLoadPos = 20, + CVLoadMask = 0x1, + + // Extendable insns. + ExtendablePos = 21, + ExtendableMask = 0x1, + // Insns must be extended. + ExtendedPos = 22, + ExtendedMask = 0x1, + // Which operand may be extended. + ExtendableOpPos = 23, + ExtendableOpMask = 0x7, + // Signed or unsigned range. + ExtentSignedPos = 26, + ExtentSignedMask = 0x1, + // Number of bits of range before extending operand. + ExtentBitsPos = 27, + ExtentBitsMask = 0x1f, + // Alignment power-of-two before extending operand. + ExtentAlignPos = 32, + ExtentAlignMask = 0x3, + + // Valid subtargets + validSubTargetPos = 34, + validSubTargetMask = 0xf, + + // Addressing mode for load/store instructions. + AddrModePos = 40, + AddrModeMask = 0x7, + // Access size for load/store instructions. + MemAccessSizePos = 43, + MemAccesSizeMask = 0xf, + + // Branch predicted taken. + TakenPos = 47, + TakenMask = 0x1, + + // Floating-point instructions. + FPPos = 48, + FPMask = 0x1, + + // New-Value producer-2 instructions. + hasNewValuePos2 = 50, + hasNewValueMask2 = 0x1, + + // Which operand consumes or produces a new value. + NewValueOpPos2 = 51, + NewValueOpMask2 = 0x7, + + // Accumulator instructions. + AccumulatorPos = 54, + AccumulatorMask = 0x1, + + // Complex XU, prevent xu competition by prefering slot3 + PrefersSlot3Pos = 55, + PrefersSlot3Mask = 0x1, + }; + + // *** The code above must match HexagonInstrFormat*.td *** // + + // Hexagon specific MO operand flag mask. + enum HexagonMOTargetFlagVal { + //===------------------------------------------------------------------===// + // Hexagon Specific MachineOperand flags. + MO_NO_FLAG, + + HMOTF_ConstExtended = 1, + + /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation + /// Used for computing a global address for PIC compilations + MO_PCREL, + + /// MO_GOT - Indicates a GOT-relative relocation + MO_GOT, + + // Low or high part of a symbol. + MO_LO16, MO_HI16, + + // Offset from the base of the SDA. + MO_GPREL + }; + + // Hexagon Sub-instruction classes. + enum SubInstructionGroup { + HSIG_None = 0, + HSIG_L1, + HSIG_L2, + HSIG_S1, + HSIG_S2, + HSIG_A, + HSIG_Compound + }; + + // Hexagon Compound classes. + enum CompoundGroup { + HCG_None = 0, + HCG_A, + HCG_B, + HCG_C + }; + + enum InstParseBits { + INST_PARSE_MASK = 0x0000c000, + INST_PARSE_PACKET_END = 0x0000c000, + INST_PARSE_LOOP_END = 0x00008000, + INST_PARSE_NOT_END = 0x00004000, + INST_PARSE_DUPLEX = 0x00000000, + INST_PARSE_EXTENDER = 0x00000000 + }; + + enum InstIClassBits : unsigned { + INST_ICLASS_MASK = 0xf0000000, + INST_ICLASS_EXTENDER = 0x00000000, + INST_ICLASS_J_1 = 0x10000000, + INST_ICLASS_J_2 = 0x20000000, + INST_ICLASS_LD_ST_1 = 0x30000000, + INST_ICLASS_LD_ST_2 = 0x40000000, + INST_ICLASS_J_3 = 0x50000000, + INST_ICLASS_CR = 0x60000000, + INST_ICLASS_ALU32_1 = 0x70000000, + INST_ICLASS_XTYPE_1 = 0x80000000, + INST_ICLASS_LD = 0x90000000, + INST_ICLASS_ST = 0xa0000000, + INST_ICLASS_ALU32_2 = 0xb0000000, + INST_ICLASS_XTYPE_2 = 0xc0000000, + INST_ICLASS_XTYPE_3 = 0xd0000000, + INST_ICLASS_XTYPE_4 = 0xe0000000, + INST_ICLASS_ALU32_3 = 0xf0000000 + }; + +} // End namespace HexagonII. + +} // End namespace llvm. + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp new file mode 100644 index 0000000..da5d4d1 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -0,0 +1,251 @@ +//===-- HexagonELFObjectWriter.cpp - Hexagon Target Descriptions ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonFixupKinds.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "hexagon-elf-writer" + +using namespace llvm; +using namespace Hexagon; + +namespace { + +class HexagonELFObjectWriter : public MCELFObjectTargetWriter { +private: + StringRef CPU; + +public: + HexagonELFObjectWriter(uint8_t OSABI, StringRef C); + + unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, + bool IsPCRel) const override; +}; +} + +HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C) + : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON, + /*HasRelocationAddend*/ true), + CPU(C) {} + +unsigned HexagonELFObjectWriter::GetRelocType(MCValue const & /*Target*/, + MCFixup const &Fixup, + bool IsPCRel) const { + switch ((unsigned)Fixup.getKind()) { + default: + DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n"); + llvm_unreachable("Unimplemented Fixup kind!"); + return ELF::R_HEX_NONE; + case FK_Data_4: + return (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32; + case FK_PCRel_4: + return ELF::R_HEX_32_PCREL; + case FK_Data_2: + return ELF::R_HEX_16; + case FK_Data_1: + return ELF::R_HEX_8; + case fixup_Hexagon_B22_PCREL: + return ELF::R_HEX_B22_PCREL; + case fixup_Hexagon_B15_PCREL: + return ELF::R_HEX_B15_PCREL; + case fixup_Hexagon_B7_PCREL: + return ELF::R_HEX_B7_PCREL; + case fixup_Hexagon_LO16: + return ELF::R_HEX_LO16; + case fixup_Hexagon_HI16: + return ELF::R_HEX_HI16; + case fixup_Hexagon_32: + return ELF::R_HEX_32; + case fixup_Hexagon_16: + return ELF::R_HEX_16; + case fixup_Hexagon_8: + return ELF::R_HEX_8; + case fixup_Hexagon_GPREL16_0: + return ELF::R_HEX_GPREL16_0; + case fixup_Hexagon_GPREL16_1: + return ELF::R_HEX_GPREL16_1; + case fixup_Hexagon_GPREL16_2: + return ELF::R_HEX_GPREL16_2; + case fixup_Hexagon_GPREL16_3: + return ELF::R_HEX_GPREL16_3; + case fixup_Hexagon_HL16: + return ELF::R_HEX_HL16; + case fixup_Hexagon_B13_PCREL: + return ELF::R_HEX_B13_PCREL; + case fixup_Hexagon_B9_PCREL: + return ELF::R_HEX_B9_PCREL; + case fixup_Hexagon_B32_PCREL_X: + return ELF::R_HEX_B32_PCREL_X; + case fixup_Hexagon_32_6_X: + return ELF::R_HEX_32_6_X; + case fixup_Hexagon_B22_PCREL_X: + return ELF::R_HEX_B22_PCREL_X; + case fixup_Hexagon_B15_PCREL_X: + return ELF::R_HEX_B15_PCREL_X; + case fixup_Hexagon_B13_PCREL_X: + return ELF::R_HEX_B13_PCREL_X; + case fixup_Hexagon_B9_PCREL_X: + return ELF::R_HEX_B9_PCREL_X; + case fixup_Hexagon_B7_PCREL_X: + return ELF::R_HEX_B7_PCREL_X; + case fixup_Hexagon_16_X: + return ELF::R_HEX_16_X; + case fixup_Hexagon_12_X: + return ELF::R_HEX_12_X; + case fixup_Hexagon_11_X: + return ELF::R_HEX_11_X; + case fixup_Hexagon_10_X: + return ELF::R_HEX_10_X; + case fixup_Hexagon_9_X: + return ELF::R_HEX_9_X; + case fixup_Hexagon_8_X: + return ELF::R_HEX_8_X; + case fixup_Hexagon_7_X: + return ELF::R_HEX_7_X; + case fixup_Hexagon_6_X: + return ELF::R_HEX_6_X; + case fixup_Hexagon_32_PCREL: + return ELF::R_HEX_32_PCREL; + case fixup_Hexagon_COPY: + return ELF::R_HEX_COPY; + case fixup_Hexagon_GLOB_DAT: + return ELF::R_HEX_GLOB_DAT; + case fixup_Hexagon_JMP_SLOT: + return ELF::R_HEX_JMP_SLOT; + case fixup_Hexagon_RELATIVE: + return ELF::R_HEX_RELATIVE; + case fixup_Hexagon_PLT_B22_PCREL: + return ELF::R_HEX_PLT_B22_PCREL; + case fixup_Hexagon_GOTREL_LO16: + return ELF::R_HEX_GOTREL_LO16; + case fixup_Hexagon_GOTREL_HI16: + return ELF::R_HEX_GOTREL_HI16; + case fixup_Hexagon_GOTREL_32: + return ELF::R_HEX_GOTREL_32; + case fixup_Hexagon_GOT_LO16: + return ELF::R_HEX_GOT_LO16; + case fixup_Hexagon_GOT_HI16: + return ELF::R_HEX_GOT_HI16; + case fixup_Hexagon_GOT_32: + return ELF::R_HEX_GOT_32; + case fixup_Hexagon_GOT_16: + return ELF::R_HEX_GOT_16; + case fixup_Hexagon_DTPMOD_32: + return ELF::R_HEX_DTPMOD_32; + case fixup_Hexagon_DTPREL_LO16: + return ELF::R_HEX_DTPREL_LO16; + case fixup_Hexagon_DTPREL_HI16: + return ELF::R_HEX_DTPREL_HI16; + case fixup_Hexagon_DTPREL_32: + return ELF::R_HEX_DTPREL_32; + case fixup_Hexagon_DTPREL_16: + return ELF::R_HEX_DTPREL_16; + case fixup_Hexagon_GD_PLT_B22_PCREL: + return ELF::R_HEX_GD_PLT_B22_PCREL; + case fixup_Hexagon_LD_PLT_B22_PCREL: + return ELF::R_HEX_LD_PLT_B22_PCREL; + case fixup_Hexagon_GD_GOT_LO16: + return ELF::R_HEX_GD_GOT_LO16; + case fixup_Hexagon_GD_GOT_HI16: + return ELF::R_HEX_GD_GOT_HI16; + case fixup_Hexagon_GD_GOT_32: + return ELF::R_HEX_GD_GOT_32; + case fixup_Hexagon_GD_GOT_16: + return ELF::R_HEX_GD_GOT_16; + case fixup_Hexagon_LD_GOT_LO16: + return ELF::R_HEX_LD_GOT_LO16; + case fixup_Hexagon_LD_GOT_HI16: + return ELF::R_HEX_LD_GOT_HI16; + case fixup_Hexagon_LD_GOT_32: + return ELF::R_HEX_LD_GOT_32; + case fixup_Hexagon_LD_GOT_16: + return ELF::R_HEX_LD_GOT_16; + case fixup_Hexagon_IE_LO16: + return ELF::R_HEX_IE_LO16; + case fixup_Hexagon_IE_HI16: + return ELF::R_HEX_IE_HI16; + case fixup_Hexagon_IE_32: + return ELF::R_HEX_IE_32; + case fixup_Hexagon_IE_GOT_LO16: + return ELF::R_HEX_IE_GOT_LO16; + case fixup_Hexagon_IE_GOT_HI16: + return ELF::R_HEX_IE_GOT_HI16; + case fixup_Hexagon_IE_GOT_32: + return ELF::R_HEX_IE_GOT_32; + case fixup_Hexagon_IE_GOT_16: + return ELF::R_HEX_IE_GOT_16; + case fixup_Hexagon_TPREL_LO16: + return ELF::R_HEX_TPREL_LO16; + case fixup_Hexagon_TPREL_HI16: + return ELF::R_HEX_TPREL_HI16; + case fixup_Hexagon_TPREL_32: + return ELF::R_HEX_TPREL_32; + case fixup_Hexagon_TPREL_16: + return ELF::R_HEX_TPREL_16; + case fixup_Hexagon_6_PCREL_X: + return ELF::R_HEX_6_PCREL_X; + case fixup_Hexagon_GOTREL_32_6_X: + return ELF::R_HEX_GOTREL_32_6_X; + case fixup_Hexagon_GOTREL_16_X: + return ELF::R_HEX_GOTREL_16_X; + case fixup_Hexagon_GOTREL_11_X: + return ELF::R_HEX_GOTREL_11_X; + case fixup_Hexagon_GOT_32_6_X: + return ELF::R_HEX_GOT_32_6_X; + case fixup_Hexagon_GOT_16_X: + return ELF::R_HEX_GOT_16_X; + case fixup_Hexagon_GOT_11_X: + return ELF::R_HEX_GOT_11_X; + case fixup_Hexagon_DTPREL_32_6_X: + return ELF::R_HEX_DTPREL_32_6_X; + case fixup_Hexagon_DTPREL_16_X: + return ELF::R_HEX_DTPREL_16_X; + case fixup_Hexagon_DTPREL_11_X: + return ELF::R_HEX_DTPREL_11_X; + case fixup_Hexagon_GD_GOT_32_6_X: + return ELF::R_HEX_GD_GOT_32_6_X; + case fixup_Hexagon_GD_GOT_16_X: + return ELF::R_HEX_GD_GOT_16_X; + case fixup_Hexagon_GD_GOT_11_X: + return ELF::R_HEX_GD_GOT_11_X; + case fixup_Hexagon_LD_GOT_32_6_X: + return ELF::R_HEX_LD_GOT_32_6_X; + case fixup_Hexagon_LD_GOT_16_X: + return ELF::R_HEX_LD_GOT_16_X; + case fixup_Hexagon_LD_GOT_11_X: + return ELF::R_HEX_LD_GOT_11_X; + case fixup_Hexagon_IE_32_6_X: + return ELF::R_HEX_IE_32_6_X; + case fixup_Hexagon_IE_16_X: + return ELF::R_HEX_IE_16_X; + case fixup_Hexagon_IE_GOT_32_6_X: + return ELF::R_HEX_IE_GOT_32_6_X; + case fixup_Hexagon_IE_GOT_16_X: + return ELF::R_HEX_IE_GOT_16_X; + case fixup_Hexagon_IE_GOT_11_X: + return ELF::R_HEX_IE_GOT_11_X; + case fixup_Hexagon_TPREL_32_6_X: + return ELF::R_HEX_TPREL_32_6_X; + case fixup_Hexagon_TPREL_16_X: + return ELF::R_HEX_TPREL_16_X; + case fixup_Hexagon_TPREL_11_X: + return ELF::R_HEX_TPREL_11_X; + } +} + +MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, + StringRef CPU) { + MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h new file mode 100644 index 0000000..4bbfbec --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h @@ -0,0 +1,137 @@ +//===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_HEXAGON_HEXAGONFIXUPKINDS_H +#define LLVM_HEXAGON_HEXAGONFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace Hexagon { +enum Fixups { + // Branch fixups for R_HEX_B{22,15,7}_PCREL. + fixup_Hexagon_B22_PCREL = FirstTargetFixupKind, + fixup_Hexagon_B15_PCREL, + fixup_Hexagon_B7_PCREL, + fixup_Hexagon_LO16, + fixup_Hexagon_HI16, + fixup_Hexagon_32, + fixup_Hexagon_16, + fixup_Hexagon_8, + fixup_Hexagon_GPREL16_0, + fixup_Hexagon_GPREL16_1, + fixup_Hexagon_GPREL16_2, + fixup_Hexagon_GPREL16_3, + fixup_Hexagon_HL16, + fixup_Hexagon_B13_PCREL, + fixup_Hexagon_B9_PCREL, + fixup_Hexagon_B32_PCREL_X, + fixup_Hexagon_32_6_X, + fixup_Hexagon_B22_PCREL_X, + fixup_Hexagon_B15_PCREL_X, + fixup_Hexagon_B13_PCREL_X, + fixup_Hexagon_B9_PCREL_X, + fixup_Hexagon_B7_PCREL_X, + fixup_Hexagon_16_X, + fixup_Hexagon_12_X, + fixup_Hexagon_11_X, + fixup_Hexagon_10_X, + fixup_Hexagon_9_X, + fixup_Hexagon_8_X, + fixup_Hexagon_7_X, + fixup_Hexagon_6_X, + fixup_Hexagon_32_PCREL, + fixup_Hexagon_COPY, + fixup_Hexagon_GLOB_DAT, + fixup_Hexagon_JMP_SLOT, + fixup_Hexagon_RELATIVE, + fixup_Hexagon_PLT_B22_PCREL, + fixup_Hexagon_GOTREL_LO16, + fixup_Hexagon_GOTREL_HI16, + fixup_Hexagon_GOTREL_32, + fixup_Hexagon_GOT_LO16, + fixup_Hexagon_GOT_HI16, + fixup_Hexagon_GOT_32, + fixup_Hexagon_GOT_16, + fixup_Hexagon_DTPMOD_32, + fixup_Hexagon_DTPREL_LO16, + fixup_Hexagon_DTPREL_HI16, + fixup_Hexagon_DTPREL_32, + fixup_Hexagon_DTPREL_16, + fixup_Hexagon_GD_PLT_B22_PCREL, + fixup_Hexagon_LD_PLT_B22_PCREL, + fixup_Hexagon_GD_GOT_LO16, + fixup_Hexagon_GD_GOT_HI16, + fixup_Hexagon_GD_GOT_32, + fixup_Hexagon_GD_GOT_16, + fixup_Hexagon_LD_GOT_LO16, + fixup_Hexagon_LD_GOT_HI16, + fixup_Hexagon_LD_GOT_32, + fixup_Hexagon_LD_GOT_16, + fixup_Hexagon_IE_LO16, + fixup_Hexagon_IE_HI16, + fixup_Hexagon_IE_32, + fixup_Hexagon_IE_16, + fixup_Hexagon_IE_GOT_LO16, + fixup_Hexagon_IE_GOT_HI16, + fixup_Hexagon_IE_GOT_32, + fixup_Hexagon_IE_GOT_16, + fixup_Hexagon_TPREL_LO16, + fixup_Hexagon_TPREL_HI16, + fixup_Hexagon_TPREL_32, + fixup_Hexagon_TPREL_16, + fixup_Hexagon_6_PCREL_X, + fixup_Hexagon_GOTREL_32_6_X, + fixup_Hexagon_GOTREL_16_X, + fixup_Hexagon_GOTREL_11_X, + fixup_Hexagon_GOT_32_6_X, + fixup_Hexagon_GOT_16_X, + fixup_Hexagon_GOT_11_X, + fixup_Hexagon_DTPREL_32_6_X, + fixup_Hexagon_DTPREL_16_X, + fixup_Hexagon_DTPREL_11_X, + fixup_Hexagon_GD_GOT_32_6_X, + fixup_Hexagon_GD_GOT_16_X, + fixup_Hexagon_GD_GOT_11_X, + fixup_Hexagon_LD_GOT_32_6_X, + fixup_Hexagon_LD_GOT_16_X, + fixup_Hexagon_LD_GOT_11_X, + fixup_Hexagon_IE_32_6_X, + fixup_Hexagon_IE_16_X, + fixup_Hexagon_IE_GOT_32_6_X, + fixup_Hexagon_IE_GOT_16_X, + fixup_Hexagon_IE_GOT_11_X, + fixup_Hexagon_TPREL_32_6_X, + fixup_Hexagon_TPREL_16_X, + fixup_Hexagon_TPREL_11_X, + + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +enum FixupBitmaps : unsigned { + Word8 = 0xff, + Word16 = 0xffff, + Word32 = 0xffffffff, + Word32_LO = 0x00c03fff, + Word32_HL = 0x0, // Not Implemented + Word32_GP = 0x0, // Not Implemented + Word32_B7 = 0x00001f18, + Word32_B9 = 0x003000fe, + Word32_B13 = 0x00202ffe, + Word32_B15 = 0x00df20fe, + Word32_B22 = 0x01ff3ffe, + Word32_R6 = 0x000007e0, + Word32_U6 = 0x0, // Not Implemented + Word32_U16 = 0x0, // Not Implemented + Word32_X26 = 0x0fff3fff +}; +} // namespace Hexagon +} // namespace llvm + +#endif // LLVM_HEXAGON_HEXAGONFIXUPKINDS_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp new file mode 100644 index 0000000..06ccec5 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -0,0 +1,233 @@ +//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Hexagon MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "HexagonAsmPrinter.h" +#include "HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#define GET_INSTRUCTION_NAME +#include "HexagonGenAsmWriter.inc" + +HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI, + MCInstrInfo const &MII, + MCRegisterInfo const &MRI) + : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) { +} + +StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { + return MII.getName(Opcode); +} + +void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + O << getRegName(RegNo); +} + +StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); +} + +void HexagonInstPrinter::setExtender(MCInst const &MCI) { + HasExtender = HexagonMCInstrInfo::isImmext(MCI); +} + +void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot, const MCSubtargetInfo &STI) { + assert(HexagonMCInstrInfo::isBundle(*MI)); + assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); + assert(HexagonMCInstrInfo::bundleSize(*MI) > 0); + HasExtender = false; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) { + MCInst const &MCI = *I.getInst(); + if (HexagonMCInstrInfo::isDuplex(MII, MCI)) { + printInstruction(MCI.getOperand(1).getInst(), OS); + OS << '\v'; + HasExtender = false; + printInstruction(MCI.getOperand(0).getInst(), OS); + } else + printInstruction(&MCI, OS); + setExtender(MCI); + OS << "\n"; + } + + auto Separator = ""; + if (HexagonMCInstrInfo::isInnerLoop(*MI)) { + OS << Separator; + Separator = " "; + MCInst ME; + ME.setOpcode(Hexagon::ENDLOOP0); + printInstruction(&ME, OS); + } + if (HexagonMCInstrInfo::isOuterLoop(*MI)) { + OS << Separator; + Separator = " "; + MCInst ME; + ME.setOpcode(Hexagon::ENDLOOP1); + printInstruction(&ME, OS); + } +} + +void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo && + (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))) + O << "#"; + MCOperand const &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + O << getRegisterName(MO.getReg()); + } else if (MO.isExpr()) { + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) + O << formatImm(Value); + else + O << *MO.getExpr(); + } else { + llvm_unreachable("Unknown operand"); + } +} + +void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI, + unsigned OpNo, + raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + O << -MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + O << -1; +} + +void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<9>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO."); + O << formatImm(Imm/64); +} + +void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<10>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO."); + O << formatImm(Imm/128); +} + +void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<10>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO."); + O << formatImm(Imm/64); +} + +void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<11>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO."); + O << formatImm(Imm/128); +} + +void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + llvm_unreachable("Unknown branch operand."); +} + +void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} + +void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} + +void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} + +void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo, + raw_ostream &O, bool hi) const { + MCOperand const &MO = MI->getOperand(OpNo); + + O << '#' << (hi ? "HI" : "LO") << '('; + if (MO.isImm()) { + O << '#'; + printOperand(MI, OpNo, O); + } else { + printOperand(MI, OpNo, O); + assert("Unknown symbol operand"); + } + O << ')'; +} + +void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + MCOperand const &MO = MI->getOperand(OpNo); + assert (MO.isExpr()); + MCExpr const &Expr = *MO.getExpr(); + int64_t Value; + if (Expr.evaluateAsAbsolute(Value)) + O << format("0x%" PRIx64, Value); + else { + if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)) + if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo) + O << "##"; + O << Expr; + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h new file mode 100644 index 0000000..5f42118 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -0,0 +1,92 @@ +//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H +#define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { +/// Prints bundles as a newline separated list of individual instructions +/// Duplexes are separated by a vertical tab \v character +/// A trailing line includes bundle properties such as endloop0/1 +/// +/// r0 = add(r1, r2) +/// r0 = #0 \v jump 0x0 +/// :endloop0 :endloop1 +class HexagonInstPrinter : public MCInstPrinter { +public: + explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII, + MCRegisterInfo const &MRI); + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + virtual StringRef getOpcodeName(unsigned Opcode) const; + void printInstruction(MCInst const *MI, raw_ostream &O); + + StringRef getRegName(unsigned RegNo) const; + static char const *getRegisterName(unsigned RegNo); + void printRegName(raw_ostream &O, unsigned RegNo) const override; + + void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printNegImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printNOneImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printBranchOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printAbsAddrOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printPredicateOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printGlobalOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + + void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + + void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { + printSymbol(MI, OpNo, O, true); + } + void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { + printSymbol(MI, OpNo, O, false); + } + + MCAsmInfo const &getMAI() const { return MAI; } + MCInstrInfo const &getMII() const { return MII; } + +protected: + void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O, + bool hi) const; + +private: + MCInstrInfo const &MII; + + bool HasExtender; + void setExtender(MCInst const &MCI); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp new file mode 100644 index 0000000..51d2f1c --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -0,0 +1,37 @@ +//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCAsmInfo.h" + +using namespace llvm; + +// Pin the vtable to this file. +void HexagonMCAsmInfo::anchor() {} + +HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = nullptr; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "//"; + + LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; + InlineAsmStart = "# InlineAsm Start"; + InlineAsmEnd = "# InlineAsm End"; + ZeroDirective = "\t.space\t"; + AscizDirective = "\t.string\t"; + + SupportsDebugInformation = true; + UsesELFSectionDirectiveForBSS = true; + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h new file mode 100644 index 0000000..a8456b4 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -0,0 +1,32 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the HexagonMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H +#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { +class Triple; + +class HexagonMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit HexagonMCAsmInfo(const Triple &TT); +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp new file mode 100644 index 0000000..46b7b41 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -0,0 +1,581 @@ +//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the checking of insns inside a bundle according to the +// packet constraint rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCChecker.h" + +#include "HexagonBaseInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false), + cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity")); + +const HexagonMCChecker::PredSense + HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); + +void HexagonMCChecker::init() { + // Initialize read-only registers set. + ReadOnly.insert(Hexagon::PC); + + // Figure out the loop-registers definitions. + if (HexagonMCInstrInfo::isInnerLoop(MCB)) { + Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0? + Defs[Hexagon::LC0].insert(Unconditional); + } + if (HexagonMCInstrInfo::isOuterLoop(MCB)) { + Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0? + Defs[Hexagon::LC1].insert(Unconditional); + } + + if (HexagonMCInstrInfo::isBundle(MCB)) + // Unfurl a bundle. + for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + init(*I.getInst()); + } + else + init(MCB); +} + +void HexagonMCChecker::init(MCInst const& MCI) { + const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI); + unsigned PredReg = Hexagon::NoRegister; + bool isTrue = false; + + // Get used registers. + for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i) + if (MCI.getOperand(i).isReg()) { + unsigned R = MCI.getOperand(i).getReg(); + + if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) { + // Note an used predicate register. + PredReg = R; + isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI); + + // Note use of new predicate register. + if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + NewPreds.insert(PredReg); + } + else + // Note register use. Super-registers are not tracked directly, + // but their components. + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + // Skip super-registers used indirectly. + Uses.insert(*SRI); + } + + // Get implicit register definitions. + if (const MCPhysReg *ImpDef = MCID.getImplicitDefs()) + for (; *ImpDef; ++ImpDef) { + unsigned R = *ImpDef; + + if (Hexagon::R31 != R && MCID.isCall()) + // Any register other than the LR and the PC are actually volatile ones + // as defined by the ABI, not modified implicitly by the call insn. + continue; + if (Hexagon::PC == R) + // Branches are the only insns that can change the PC, + // otherwise a read-only register. + continue; + + if (Hexagon::USR_OVF == R) + // Many insns change the USR implicitly, but only one or another flag. + // The instruction table models the USR.OVF flag, which can be implicitly + // modified more than once, but cannot be modified in the same packet + // with an instruction that modifies is explicitly. Deal with such situ- + // ations individually. + SoftDefs.insert(R); + else if (isPredicateRegister(R) && + HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) + // Include implicit late predicates. + LatePreds.insert(R); + else + Defs[R].insert(PredSense(PredReg, isTrue)); + } + + // Figure out explicit register definitions. + for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { + unsigned R = MCI.getOperand(i).getReg(), + S = Hexagon::NoRegister; + + // Note register definitions, direct ones as well as indirect side-effects. + // Super-registers are not tracked directly, but their components. + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) { + if (MCSubRegIterator(*SRI, &RI).isValid()) + // Skip super-registers defined indirectly. + continue; + + if (R == *SRI) { + if (S == R) + // Avoid scoring the defined register multiple times. + continue; + else + // Note that the defined register has already been scored. + S = R; + } + + if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI) + // P3:0 is a special case, since multiple predicate register definitions + // in a packet is allowed as the equivalent of their logical "and". + // Only an explicit definition of P3:0 is noted as such; if a + // side-effect, then note as a soft definition. + SoftDefs.insert(*SRI); + else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI)) + // Some insns produce predicates too late to be used in the same packet. + LatePreds.insert(*SRI); + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD) + // Current loads should be used in the same packet. + // TODO: relies on the impossibility of a current and a temporary loads + // in the same packet. + CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue)); + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD) + // Temporary loads should be used in the same packet, but don't commit + // results, so it should be disregarded if another insn changes the same + // register. + // TODO: relies on the impossibility of a current and a temporary loads + // in the same packet. + TmpDefs.insert(*SRI); + else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) ) + // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and + // destination registers with this instruction. same for vdeal(Vx,Vy,Rx) + Uses.insert(*SRI); + else + Defs[*SRI].insert(PredSense(PredReg, isTrue)); + } + } + + // Figure out register definitions that produce new values. + if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) { + unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); + + if (HexagonMCInstrInfo::isCompound(MCII, MCI)) + compoundRegisterMap(R); // Compound insns have a limited register range. + + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + // No super-registers defined indirectly. + NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); + + // For fairly unique 2-dot-new producers, example: + // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers. + if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) { + unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg(); + + for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); + } + } + + // Figure out definitions of new predicate registers. + if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i) + if (MCI.getOperand(i).isReg()) { + unsigned P = MCI.getOperand(i).getReg(); + + if (isPredicateRegister(P)) + NewPreds.insert(P); + } + + // Figure out uses of new values. + if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) { + unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); + + if (!MCSubRegIterator(N, &RI).isValid()) { + // Super-registers cannot use new values. + if (MCID.isBranch()) + NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV); + else + NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); + } + } +} + +HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx, + MCRegisterInfo const &ri) + : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI), + bLoadErrInfo(false) { + init(); +} + +bool HexagonMCChecker::check() { + bool chkB = checkBranches(); + bool chkP = checkPredicates(); + bool chkNV = checkNewValues(); + bool chkR = checkRegisters(); + bool chkS = checkSolo(); + bool chkSh = checkShuffle(); + bool chkSl = checkSlots(); + bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl; + + return chk; +} + +bool HexagonMCChecker::checkSlots() + +{ + unsigned slotsUsed = 0; + for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) { + MCInst const& MCI = *HMI.getInst(); + if (HexagonMCInstrInfo::isImmext(MCI)) + continue; + if (HexagonMCInstrInfo::isDuplex(MCII, MCI)) + slotsUsed += 2; + else + ++slotsUsed; + } + + if (slotsUsed > HEXAGON_PACKET_SIZE) { + HexagonMCErrInfo errInfo; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS); + addErrInfo(errInfo); + return false; + } + return true; +} + +// Check legal use of branches. +bool HexagonMCChecker::checkBranches() { + HexagonMCErrInfo errInfo; + if (HexagonMCInstrInfo::isBundle(MCB)) { + bool hasConditional = false; + unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0, + NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, + Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE; + + for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset; + i < MCB.size(); ++i) { + MCInst const &MCI = *MCB.begin()[i].getInst(); + + if (HexagonMCInstrInfo::isImmext(MCI)) + continue; + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() || + HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) { + ++Branches; + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() && + HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + ++NewIndirectBranches; + if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) + ++NewValueBranches; + + if (HexagonMCInstrInfo::isPredicated(MCII, MCI) || + HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) { + hasConditional = true; + Conditional = i; // Record the position of the conditional branch. + } else { + Unconditional = i; // Record the position of the unconditional branch. + } + } + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() && + HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad()) + ++Returns; + } + + if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too? + if (HexagonMCInstrInfo::isInnerLoop(MCB) || + HexagonMCInstrInfo::isOuterLoop(MCB)) { + // Error out if there's any branch in a loop-end packet. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC); + addErrInfo(errInfo); + return false; + } + if (Branches > 1) + if (!hasConditional || Conditional > Unconditional) { + // Error out if more than one unconditional branch or + // the conditional branch appears after the unconditional one. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check legal use of predicate registers. +bool HexagonMCChecker::checkPredicates() { + HexagonMCErrInfo errInfo; + // Check for proper use of new predicate registers. + for (const auto& I : NewPreds) { + unsigned P = I; + + if (!Defs.count(P) || LatePreds.count(P)) { + // Error out if the new predicate register is not defined, + // or defined "late" + // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P); + addErrInfo(errInfo); + return false; + } + } + + // Check for proper use of auto-anded of predicate registers. + for (const auto& I : LatePreds) { + unsigned P = I; + + if (LatePreds.count(P) > 1 || Defs.count(P)) { + // Error out if predicate register defined "late" multiple times or + // defined late and regularly defined + // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }". + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check legal use of new values. +bool HexagonMCChecker::checkNewValues() { + HexagonMCErrInfo errInfo; + memset(&errInfo, 0, sizeof(errInfo)); + for (auto& I : NewUses) { + unsigned R = I.first; + NewSense &US = I.second; + + if (!hasValidNewValueDef(US, NewDefs[R])) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check for legal register uses and definitions. +bool HexagonMCChecker::checkRegisters() { + HexagonMCErrInfo errInfo; + // Check for proper register definitions. + for (const auto& I : Defs) { + unsigned R = I.first; + + if (ReadOnly.count(R)) { + // Error out for definitions of read-only registers. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R); + addErrInfo(errInfo); + return false; + } + if (isLoopRegister(R) && Defs.count(R) > 1 && + (HexagonMCInstrInfo::isInnerLoop(MCB) || + HexagonMCInstrInfo::isOuterLoop(MCB))) { + // Error out for definitions of loop registers at the end of a loop. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R); + addErrInfo(errInfo); + return false; + } + if (SoftDefs.count(R)) { + // Error out for explicit changes to registers also weakly defined + // (e.g., "{ usr = r0; r0 = sfadd(...) }"). + unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:. + unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); + addErrInfo(errInfo); + return false; + } + if (!isPredicateRegister(R) && Defs[R].size() > 1) { + // Check for multiple register definitions. + PredSet &PM = Defs[R]; + + // Check for multiple unconditional register definitions. + if (PM.count(Unconditional)) { + // Error out on an unconditional change when there are any other + // changes, conditional or not. + unsigned UsrR = Hexagon::USR; + unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); + addErrInfo(errInfo); + return false; + } + // Check for multiple conditional register definitions. + for (const auto& J : PM) { + PredSense P = J; + + // Check for multiple uses of the same condition. + if (PM.count(P) > 1) { + // Error out on conditional changes based on the same predicate + // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); + addErrInfo(errInfo); + return false; + } + // Check for the use of the complementary condition. + P.second = !P.second; + if (PM.count(P) && PM.size() > 2) { + // Error out on conditional changes based on the same predicate + // multiple times + // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); + addErrInfo(errInfo); + return false; + } + } + } + } + + // Check for use of current definitions. + for (const auto& I : CurDefs) { + unsigned R = I; + + if (!Uses.count(R)) { + // Warn on an unused current definition. + errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R); + addErrInfo(errInfo); + return true; + } + } + + // Check for use of temporary definitions. + for (const auto& I : TmpDefs) { + unsigned R = I; + + if (!Uses.count(R)) { + // special case for vhist + bool vHistFound = false; + for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) { + vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp + break; + } + } + // Warn on an unused temporary definition. + if (vHistFound == false) { + errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R); + addErrInfo(errInfo); + return true; + } + } + } + + return true; +} + +// Check for legal use of solo insns. +bool HexagonMCChecker::checkSolo() { + HexagonMCErrInfo errInfo; + if (HexagonMCInstrInfo::isBundle(MCB) && + HexagonMCInstrInfo::bundleSize(MCB) > 1) { + for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO); + addErrInfo(errInfo); + return false; + } + } + } + + return true; +} + +bool HexagonMCChecker::checkShuffle() { + HexagonMCErrInfo errInfo; + // Branch info is lost when duplexing. The unduplexed insns must be + // checked and only branch errors matter for this case. + HexagonMCShuffler MCS(MCII, STI, MCB); + if (!MCS.check()) { + if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); + errInfo.setShuffleError(MCS.getError()); + addErrInfo(errInfo); + return false; + } + } + HexagonMCShuffler MCSDX(MCII, STI, MCBDX); + if (!MCSDX.check()) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); + errInfo.setShuffleError(MCSDX.getError()); + addErrInfo(errInfo); + return false; + } + return true; +} + +void HexagonMCChecker::compoundRegisterMap(unsigned& Register) { + switch (Register) { + default: + break; + case Hexagon::R15: + Register = Hexagon::R23; + break; + case Hexagon::R14: + Register = Hexagon::R22; + break; + case Hexagon::R13: + Register = Hexagon::R21; + break; + case Hexagon::R12: + Register = Hexagon::R20; + break; + case Hexagon::R11: + Register = Hexagon::R19; + break; + case Hexagon::R10: + Register = Hexagon::R18; + break; + case Hexagon::R9: + Register = Hexagon::R17; + break; + case Hexagon::R8: + Register = Hexagon::R16; + break; + } +} + +bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, + const NewSenseList &Defs) const { + bool Strict = !RelaxNVChecks; + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + const NewSense &Def = Defs[i]; + // NVJ cannot use a new FP value [7.6.1] + if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0)) + continue; + // If the definition was not predicated, then it does not matter if + // the use is. + if (Def.PredReg == 0) + return true; + // With the strict checks, both the definition and the use must be + // predicated on the same register and condition. + if (Strict) { + if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond) + return true; + } else { + // With the relaxed checks, if the definition was predicated, the only + // detectable violation is if the use is predicated on the opposing + // condition, otherwise, it's ok. + if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond) + return true; + } + } + return false; +} + diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h new file mode 100644 index 0000000..5fc0bde --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -0,0 +1,218 @@ +//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the checking of insns inside a bundle according to the +// packet constraint rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCCHECKER_H +#define HEXAGONMCCHECKER_H + +#include <map> +#include <set> +#include <queue> +#include "MCTargetDesc/HexagonMCShuffler.h" + +using namespace llvm; + +namespace llvm { +class MCOperandInfo; + +typedef struct { + unsigned Error, Warning, ShuffleError; + unsigned Register; +} ErrInfo_T; + +class HexagonMCErrInfo { +public: + enum { + CHECK_SUCCESS = 0, + // Errors. + CHECK_ERROR_BRANCHES = 0x00001, + CHECK_ERROR_NEWP = 0x00002, + CHECK_ERROR_NEWV = 0x00004, + CHECK_ERROR_REGISTERS = 0x00008, + CHECK_ERROR_READONLY = 0x00010, + CHECK_ERROR_LOOP = 0x00020, + CHECK_ERROR_ENDLOOP = 0x00040, + CHECK_ERROR_SOLO = 0x00080, + CHECK_ERROR_SHUFFLE = 0x00100, + CHECK_ERROR_NOSLOTS = 0x00200, + CHECK_ERROR_UNKNOWN = 0x00400, + // Warnings. + CHECK_WARN_CURRENT = 0x10000, + CHECK_WARN_TEMPORARY = 0x20000 + }; + ErrInfo_T s; + + void reset() { + s.Error = CHECK_SUCCESS; + s.Warning = CHECK_SUCCESS; + s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS; + s.Register = Hexagon::NoRegister; + }; + HexagonMCErrInfo() { + reset(); + }; + + void setError(unsigned e, unsigned r = Hexagon::NoRegister) + { s.Error = e; s.Register = r; }; + void setWarning(unsigned w, unsigned r = Hexagon::NoRegister) + { s.Warning = w; s.Register = r; }; + void setShuffleError(unsigned e) { s.ShuffleError = e; }; +}; + +/// Check for a valid bundle. +class HexagonMCChecker { + /// Insn bundle. + MCInst& MCB; + MCInst& MCBDX; + const MCRegisterInfo& RI; + MCInstrInfo const &MCII; + MCSubtargetInfo const &STI; + bool bLoadErrInfo; + + /// Set of definitions: register #, if predicated, if predicated true. + typedef std::pair<unsigned, bool> PredSense; + static const PredSense Unconditional; + typedef std::multiset<PredSense> PredSet; + typedef std::multiset<PredSense>::iterator PredSetIterator; + + typedef llvm::DenseMap<unsigned, PredSet>::iterator DefsIterator; + llvm::DenseMap<unsigned, PredSet> Defs; + + /// Information about how a new-value register is defined or used: + /// PredReg = predicate register, 0 if use/def not predicated, + /// Cond = true/false for if(PredReg)/if(!PredReg) respectively, + /// IsFloat = true if definition produces a floating point value + /// (not valid for uses), + /// IsNVJ = true if the use is a new-value branch (not valid for + /// definitions). + struct NewSense { + unsigned PredReg; + bool IsFloat, IsNVJ, Cond; + // The special-case "constructors": + static NewSense Jmp(bool isNVJ) { + NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ, + /*Cond=*/ false }; + return NS; + } + static NewSense Use(unsigned PR, bool True) { + NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false, + /*Cond=*/ True }; + return NS; + } + static NewSense Def(unsigned PR, bool True, bool Float) { + NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false, + /*Cond=*/ True }; + return NS; + } + }; + /// Set of definitions that produce new register: + typedef llvm::SmallVector<NewSense,2> NewSenseList; + typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator; + llvm::DenseMap<unsigned, NewSenseList> NewDefs; + + /// Set of weak definitions whose clashes should be enforced selectively. + typedef std::set<unsigned>::iterator SoftDefsIterator; + std::set<unsigned> SoftDefs; + + /// Set of current definitions committed to the register file. + typedef std::set<unsigned>::iterator CurDefsIterator; + std::set<unsigned> CurDefs; + + /// Set of temporary definitions not committed to the register file. + typedef std::set<unsigned>::iterator TmpDefsIterator; + std::set<unsigned> TmpDefs; + + /// Set of new predicates used. + typedef std::set<unsigned>::iterator NewPredsIterator; + std::set<unsigned> NewPreds; + + /// Set of predicates defined late. + typedef std::multiset<unsigned>::iterator LatePredsIterator; + std::multiset<unsigned> LatePreds; + + /// Set of uses. + typedef std::set<unsigned>::iterator UsesIterator; + std::set<unsigned> Uses; + + /// Set of new values used: new register, if new-value jump. + typedef llvm::DenseMap<unsigned, NewSense>::iterator NewUsesIterator; + llvm::DenseMap<unsigned, NewSense> NewUses; + + /// Pre-defined set of read-only registers. + typedef std::set<unsigned>::iterator ReadOnlyIterator; + std::set<unsigned> ReadOnly; + + std::queue<ErrInfo_T> ErrInfoQ; + HexagonMCErrInfo CrntErrInfo; + + void getErrInfo() { + if (bLoadErrInfo == true) { + if (ErrInfoQ.empty()) { + CrntErrInfo.reset(); + } else { + CrntErrInfo.s = ErrInfoQ.front(); + ErrInfoQ.pop(); + } + } + bLoadErrInfo = false; + } + + void init(); + void init(MCInst const&); + + // Checks performed. + bool checkBranches(); + bool checkPredicates(); + bool checkNewValues(); + bool checkRegisters(); + bool checkSolo(); + bool checkShuffle(); + bool checkSlots(); + + static void compoundRegisterMap(unsigned&); + + bool isPredicateRegister(unsigned R) const { + return (Hexagon::P0 == R || Hexagon::P1 == R || + Hexagon::P2 == R || Hexagon::P3 == R); + }; + bool isLoopRegister(unsigned R) const { + return (Hexagon::SA0 == R || Hexagon::LC0 == R || + Hexagon::SA1 == R || Hexagon::LC1 == R); + }; + + bool hasValidNewValueDef(const NewSense &Use, + const NewSenseList &Defs) const; + + public: + explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx, + const MCRegisterInfo& ri); + + bool check(); + + /// add a new error/warning + void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); }; + + /// Return the error code for the last operation in the insn bundle. + unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; }; + unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; }; + unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; }; + unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; }; + bool getNextErrInfo() { + bLoadErrInfo = true; + return (ErrInfoQ.empty()) ? false : (getErrInfo(), true); + } +}; + +} + +#endif // HEXAGONMCCHECKER_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp new file mode 100644 index 0000000..c2c6275 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -0,0 +1,741 @@ +//===-- HexagonMCCodeEmitter.cpp - Hexagon Target Descriptions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonFixupKinds.h" +#include "MCTargetDesc/HexagonMCCodeEmitter.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "mccodeemitter" + +using namespace llvm; +using namespace Hexagon; + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); + +HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, + MCContext &aMCT) + : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)), + Extended(new bool(false)), CurrentBundle(new MCInst const *) {} + +uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last, + MCInst const &MCB, + MCInst const &MCI) const { + bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI); + if (Instruction == 0) { + if (HexagonMCInstrInfo::isInnerLoop(MCB)) { + assert(!Duplex); + assert(Instruction != Last); + return HexagonII::INST_PARSE_LOOP_END; + } + } + if (Instruction == 1) { + if (HexagonMCInstrInfo::isOuterLoop(MCB)) { + assert(!Duplex); + assert(Instruction != Last); + return HexagonII::INST_PARSE_LOOP_END; + } + } + if (Duplex) { + assert(Instruction == Last); + return HexagonII::INST_PARSE_DUPLEX; + } + if(Instruction == Last) + return HexagonII::INST_PARSE_PACKET_END; + return HexagonII::INST_PARSE_NOT_END; +} + +void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + MCSubtargetInfo const &STI) const { + MCInst &HMB = const_cast<MCInst &>(MI); + + assert(HexagonMCInstrInfo::isBundle(HMB)); + DEBUG(dbgs() << "Encoding bundle\n";); + *Addend = 0; + *Extended = false; + *CurrentBundle = &MI; + size_t Instruction = 0; + size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1; + for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) { + MCInst &HMI = const_cast<MCInst &>(*I.getInst()); + EncodeSingleInstruction(HMI, OS, Fixups, STI, + parseBits(Instruction, Last, HMB, HMI), + Instruction); + *Extended = HexagonMCInstrInfo::isImmext(HMI); + *Addend += HEXAGON_INSTR_SIZE; + ++Instruction; + } + return; +} + +/// EncodeSingleInstruction - Emit a single +void HexagonMCCodeEmitter::EncodeSingleInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const { + MCInst HMB = MI; + assert(!HexagonMCInstrInfo::isBundle(HMB)); + uint64_t Binary; + + // Compound instructions are limited to using registers 0-7 and 16-23 + // and here we make a map 16-23 to 8-15 so they can be correctly encoded. + static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10, + Hexagon::R11, Hexagon::R12, Hexagon::R13, + Hexagon::R14, Hexagon::R15}; + + // Pseudo instructions don't get encoded and shouldn't be here + // in the first place! + assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() && + "pseudo-instruction found"); + DEBUG(dbgs() << "Encoding insn" + " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" + "\n"); + + if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) { + for (unsigned i = 0; i < HMB.getNumOperands(); ++i) + if (HMB.getOperand(i).isReg()) { + unsigned Reg = + MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg()); + if ((Reg <= 23) && (Reg >= 16)) + HMB.getOperand(i).setReg(RegMap[Reg - 16]); + } + } + + if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) { + // Calculate the new value distance to the associated producer + MCOperand &MCO = + HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB)); + unsigned SOffset = 0; + unsigned Register = MCO.getReg(); + unsigned Register1; + auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto i = Instructions.begin() + Index - 1; + for (;; --i) { + assert(i != Instructions.begin() - 1 && "Couldn't find producer"); + MCInst const &Inst = *i->getInst(); + if (HexagonMCInstrInfo::isImmext(Inst)) + continue; + ++SOffset; + Register1 = + HexagonMCInstrInfo::hasNewValue(MCII, Inst) + ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg() + : static_cast<unsigned>(Hexagon::NoRegister); + if (Register != Register1) + // This isn't the register we're looking for + continue; + if (!HexagonMCInstrInfo::isPredicated(MCII, Inst)) + // Producer is unpredicated + break; + assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) && + "Unpredicated consumer depending on predicated producer"); + if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) == + HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB)) + // Producer predicate sense matched ours + break; + } + // Hexagon PRM 10.11 Construct Nt from distance + unsigned Offset = SOffset; + Offset <<= 1; + MCO.setReg(Offset + Hexagon::R0); + } + + Binary = getBinaryCodeForInstr(HMB, Fixups, STI); + // Check for unimplemented instructions. Immediate extenders + // are encoded as zero, so they need to be accounted for. + if ((!Binary) && + ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) && + (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) && + (HMB.getOpcode() != A4_ext_g))) { + // Use a A2_nop for unimplemented instructions. + DEBUG(dbgs() << "Unimplemented inst: " + " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" + "\n"); + llvm_unreachable("Unimplemented Instruction"); + } + Binary |= Parse; + + // if we need to emit a duplexed instruction + if (HMB.getOpcode() >= Hexagon::DuplexIClass0 && + HMB.getOpcode() <= Hexagon::DuplexIClassF) { + assert(Parse == HexagonII::INST_PARSE_DUPLEX && + "Emitting duplex without duplex parse bits"); + unsigned dupIClass; + switch (HMB.getOpcode()) { + case Hexagon::DuplexIClass0: + dupIClass = 0; + break; + case Hexagon::DuplexIClass1: + dupIClass = 1; + break; + case Hexagon::DuplexIClass2: + dupIClass = 2; + break; + case Hexagon::DuplexIClass3: + dupIClass = 3; + break; + case Hexagon::DuplexIClass4: + dupIClass = 4; + break; + case Hexagon::DuplexIClass5: + dupIClass = 5; + break; + case Hexagon::DuplexIClass6: + dupIClass = 6; + break; + case Hexagon::DuplexIClass7: + dupIClass = 7; + break; + case Hexagon::DuplexIClass8: + dupIClass = 8; + break; + case Hexagon::DuplexIClass9: + dupIClass = 9; + break; + case Hexagon::DuplexIClassA: + dupIClass = 10; + break; + case Hexagon::DuplexIClassB: + dupIClass = 11; + break; + case Hexagon::DuplexIClassC: + dupIClass = 12; + break; + case Hexagon::DuplexIClassD: + dupIClass = 13; + break; + case Hexagon::DuplexIClassE: + dupIClass = 14; + break; + case Hexagon::DuplexIClassF: + dupIClass = 15; + break; + default: + llvm_unreachable("Unimplemented DuplexIClass"); + break; + } + // 29 is the bit position. + // 0b1110 =0xE bits are masked off and down shifted by 1 bit. + // Last bit is moved to bit position 13 + Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13); + + const MCInst *subInst0 = HMB.getOperand(0).getInst(); + const MCInst *subInst1 = HMB.getOperand(1).getInst(); + + // get subinstruction slot 0 + unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI); + // get subinstruction slot 1 + unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI); + + Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16); + } + support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); + ++MCNumEmitted; +} + +static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, + const MCOperand &MO, + const MCSymbolRefExpr::VariantKind kind) { + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI); + unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI); + + if (insnType == HexagonII::TypePREFIX) { + switch (kind) { + case llvm::MCSymbolRefExpr::VK_GOTOFF: + return Hexagon::fixup_Hexagon_GOTREL_32_6_X; + case llvm::MCSymbolRefExpr::VK_GOT: + return Hexagon::fixup_Hexagon_GOT_32_6_X; + case llvm::MCSymbolRefExpr::VK_TPREL: + return Hexagon::fixup_Hexagon_TPREL_32_6_X; + case llvm::MCSymbolRefExpr::VK_DTPREL: + return Hexagon::fixup_Hexagon_DTPREL_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + return Hexagon::fixup_Hexagon_GD_GOT_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + return Hexagon::fixup_Hexagon_LD_GOT_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + return Hexagon::fixup_Hexagon_IE_32_6_X; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + return Hexagon::fixup_Hexagon_IE_GOT_32_6_X; + default: + if (MCID.isBranch()) + return Hexagon::fixup_Hexagon_B32_PCREL_X; + else + return Hexagon::fixup_Hexagon_32_6_X; + } + } else if (MCID.isBranch()) + return (Hexagon::fixup_Hexagon_B13_PCREL); + + switch (MCID.getOpcode()) { + case Hexagon::HI: + case Hexagon::A2_tfrih: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_GOT: + return Hexagon::fixup_Hexagon_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + return Hexagon::fixup_Hexagon_GOTREL_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + return Hexagon::fixup_Hexagon_GD_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + return Hexagon::fixup_Hexagon_LD_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + return Hexagon::fixup_Hexagon_IE_HI16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + return Hexagon::fixup_Hexagon_IE_GOT_HI16; + case llvm::MCSymbolRefExpr::VK_TPREL: + return Hexagon::fixup_Hexagon_TPREL_HI16; + case llvm::MCSymbolRefExpr::VK_DTPREL: + return Hexagon::fixup_Hexagon_DTPREL_HI16; + default: + return Hexagon::fixup_Hexagon_HI16; + } + + case Hexagon::LO: + case Hexagon::A2_tfril: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_GOT: + return Hexagon::fixup_Hexagon_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + return Hexagon::fixup_Hexagon_GOTREL_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + return Hexagon::fixup_Hexagon_GD_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + return Hexagon::fixup_Hexagon_LD_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + return Hexagon::fixup_Hexagon_IE_LO16; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + return Hexagon::fixup_Hexagon_IE_GOT_LO16; + case llvm::MCSymbolRefExpr::VK_TPREL: + return Hexagon::fixup_Hexagon_TPREL_LO16; + case llvm::MCSymbolRefExpr::VK_DTPREL: + return Hexagon::fixup_Hexagon_DTPREL_LO16; + default: + return Hexagon::fixup_Hexagon_LO16; + } + + // The only relocs left should be GP relative: + default: + if (MCID.mayStore() || MCID.mayLoad()) { + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; + ++ImpUses) { + if (*ImpUses == Hexagon::GP) { + switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { + case HexagonII::MemAccessSize::ByteAccess: + return fixup_Hexagon_GPREL16_0; + case HexagonII::MemAccessSize::HalfWordAccess: + return fixup_Hexagon_GPREL16_1; + case HexagonII::MemAccessSize::WordAccess: + return fixup_Hexagon_GPREL16_2; + case HexagonII::MemAccessSize::DoubleWordAccess: + return fixup_Hexagon_GPREL16_3; + default: + llvm_unreachable("unhandled fixup"); + } + } + } + } else + llvm_unreachable("unhandled fixup"); + } + + return LastTargetFixupKind; +} + +namespace llvm { +extern const MCInstrDesc HexagonInsts[]; +} + +namespace { + bool isPCRel (unsigned Kind) { + switch(Kind){ + case fixup_Hexagon_B22_PCREL: + case fixup_Hexagon_B15_PCREL: + case fixup_Hexagon_B7_PCREL: + case fixup_Hexagon_B13_PCREL: + case fixup_Hexagon_B9_PCREL: + case fixup_Hexagon_B32_PCREL_X: + case fixup_Hexagon_B22_PCREL_X: + case fixup_Hexagon_B15_PCREL_X: + case fixup_Hexagon_B13_PCREL_X: + case fixup_Hexagon_B9_PCREL_X: + case fixup_Hexagon_B7_PCREL_X: + case fixup_Hexagon_32_PCREL: + case fixup_Hexagon_PLT_B22_PCREL: + case fixup_Hexagon_GD_PLT_B22_PCREL: + case fixup_Hexagon_LD_PLT_B22_PCREL: + case fixup_Hexagon_6_PCREL_X: + return true; + default: + return false; + } + } +} + +unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, + const MCOperand &MO, + const MCExpr *ME, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const + +{ + int64_t Res; + + if (ME->evaluateAsAbsolute(Res)) + return Res; + + MCExpr::ExprKind MK = ME->getKind(); + if (MK == MCExpr::Constant) { + return cast<MCConstantExpr>(ME)->getValue(); + } + if (MK == MCExpr::Binary) { + getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI); + getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI); + return 0; + } + + assert(MK == MCExpr::SymbolRef); + + Hexagon::Fixups FixupKind = + Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16); + const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME); + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI); + unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) - + HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind(); + + DEBUG(dbgs() << "----------------------------------------\n"); + DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI) + << "\n"); + DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n"); + DEBUG(dbgs() << "Relocation bits: " << bits << "\n"); + DEBUG(dbgs() << "Addend: " << *Addend << "\n"); + DEBUG(dbgs() << "----------------------------------------\n"); + + switch (bits) { + default: + DEBUG(dbgs() << "unrecognized bit count of " << bits << '\n'); + break; + + case 32: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL: + FixupKind = Hexagon::fixup_Hexagon_32_PCREL; + break; + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X + : Hexagon::fixup_Hexagon_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X + : Hexagon::fixup_Hexagon_GOTREL_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X + : Hexagon::fixup_Hexagon_GD_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X + : Hexagon::fixup_Hexagon_LD_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X + : Hexagon::fixup_Hexagon_IE_32; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X + : Hexagon::fixup_Hexagon_IE_GOT_32; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X + : Hexagon::fixup_Hexagon_TPREL_32; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X + : Hexagon::fixup_Hexagon_DTPREL_32; + break; + default: + FixupKind = + *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32; + break; + } + break; + + case 22: + switch (kind) { + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_PLT: + FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_PLT: + FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; + break; + default: + if (MCID.isBranch() || MCID.isCall()) { + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X + : Hexagon::fixup_Hexagon_B22_PCREL; + } else { + errs() << "unrecognized relocation, bits: " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + } + break; + + case 16: + if (*Extended) { + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_16_X; + break; + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + FixupKind = Hexagon::fixup_Hexagon_IE_16_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X; + break; + } + } else + switch (kind) { + default: + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + if ((MCID.getOpcode() == Hexagon::HI) || + (MCID.getOpcode() == Hexagon::LO_H)) + FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16; + else + FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GPREL: + FixupKind = Hexagon::fixup_Hexagon_GPREL16_0; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LO16: + FixupKind = Hexagon::fixup_Hexagon_LO16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_HI16: + FixupKind = Hexagon::fixup_Hexagon_HI16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_16; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_16; + break; + } + break; + + case 15: + if (MCID.isBranch() || MCID.isCall()) + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X + : Hexagon::fixup_Hexagon_B15_PCREL; + break; + + case 13: + if (MCID.isBranch()) + FixupKind = Hexagon::fixup_Hexagon_B13_PCREL; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 12: + if (*Extended) + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_12_X; + break; + // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26 + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; + break; + } + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 11: + if (*Extended) + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_11_X; + break; + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X; + break; + case llvm::MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X; + break; + } + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 10: + if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_10_X; + break; + + case 9: + if (MCID.isBranch() || + (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X + : Hexagon::fixup_Hexagon_B9_PCREL; + else if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_9_X; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 8: + if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_8_X; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 7: + if (MCID.isBranch() || + (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X + : Hexagon::fixup_Hexagon_B7_PCREL; + else if (*Extended) + FixupKind = Hexagon::fixup_Hexagon_7_X; + else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 6: + if (*Extended) { + switch (kind) { + default: + FixupKind = Hexagon::fixup_Hexagon_6_X; + break; + case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL: + FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X; + break; + // This is part of an extender, GOT_11 is a + // Word32_U6 unsigned/truncated reloc. + case llvm::MCSymbolRefExpr::VK_GOT: + FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; + break; + case llvm::MCSymbolRefExpr::VK_GOTOFF: + FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; + break; + } + } else { + errs() << "unrecognized relocation, bits " << bits << "\n"; + errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + } + break; + + case 0: + FixupKind = getFixupNoBits(MCII, MI, MO, kind); + break; + } + + MCExpr const *FixupExpression = (*Addend > 0 && isPCRel(FixupKind)) ? + MCBinaryExpr::createAdd(MO.getExpr(), + MCConstantExpr::create(*Addend, MCT), MCT) : + MO.getExpr(); + + MCFixup fixup = MCFixup::create(*Addend, FixupExpression, + MCFixupKind(FixupKind), MI.getLoc()); + Fixups.push_back(fixup); + // All of the information is in the fixup. + return (0); +} + +unsigned +HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, + SmallVectorImpl<MCFixup> &Fixups, + MCSubtargetInfo const &STI) const { + if (MO.isReg()) + return MCT.getRegisterInfo()->getEncodingValue(MO.getReg()); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + // MO must be an ME. + assert(MO.isExpr()); + return getExprOpValue(MI, MO, MO.getExpr(), Fixups, STI); +} + +MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII, + MCRegisterInfo const &MRI, + MCContext &MCT) { + return new HexagonMCCodeEmitter(MII, MCT); +} + +#include "HexagonGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h new file mode 100644 index 0000000..2a154da --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -0,0 +1,70 @@ +//===-- HexagonMCCodeEmitter.h - Hexagon Target Descriptions ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Definition for classes that emit Hexagon machine code from MCInsts +/// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCCODEEMITTER_H +#define HEXAGONMCCODEEMITTER_H + +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class HexagonMCCodeEmitter : public MCCodeEmitter { + MCContext &MCT; + MCInstrInfo const &MCII; + std::unique_ptr<unsigned> Addend; + std::unique_ptr<bool> Extended; + std::unique_ptr<MCInst const *> CurrentBundle; + + // helper routine for getMachineOpValue() + unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, + const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + +public: + HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT); + + // Return parse bits for instruction `MCI' inside bundle `MCB' + uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB, + MCInst const &MCI) const; + + void encodeInstruction(MCInst const &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + MCSubtargetInfo const &STI) const override; + + void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, + uint32_t Parse, size_t Index) const; + + // \brief TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(MCInst const &MI, + SmallVectorImpl<MCFixup> &Fixups, + MCSubtargetInfo const &STI) const; + + /// \brief Return binary encoding of operand. + unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO, + SmallVectorImpl<MCFixup> &Fixups, + MCSubtargetInfo const &STI) const; +}; // class HexagonMCCodeEmitter + +} // namespace llvm + +#endif /* HEXAGONMCCODEEMITTER_H */ diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp new file mode 100644 index 0000000..d194bea --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -0,0 +1,427 @@ + +//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is looks at a packet and tries to form compound insns +// +//===----------------------------------------------------------------------===// +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace Hexagon; + +#define DEBUG_TYPE "hexagon-mccompound" + +enum OpcodeIndex { + fp0_jump_nt = 0, + fp0_jump_t, + fp1_jump_nt, + fp1_jump_t, + tp0_jump_nt, + tp0_jump_t, + tp1_jump_nt, + tp1_jump_t +}; + +static const unsigned tstBitOpcode[8] = { + J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t, J4_tstbit0_fp1_jump_nt, + J4_tstbit0_fp1_jump_t, J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t, + J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t}; +static const unsigned cmpeqBitOpcode[8] = { + J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t, J4_cmpeq_fp1_jump_nt, + J4_cmpeq_fp1_jump_t, J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t, + J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t}; +static const unsigned cmpgtBitOpcode[8] = { + J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t, J4_cmpgt_fp1_jump_nt, + J4_cmpgt_fp1_jump_t, J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t, + J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t}; +static const unsigned cmpgtuBitOpcode[8] = { + J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t, J4_cmpgtu_fp1_jump_nt, + J4_cmpgtu_fp1_jump_t, J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t, + J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t}; +static const unsigned cmpeqiBitOpcode[8] = { + J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t, J4_cmpeqi_fp1_jump_nt, + J4_cmpeqi_fp1_jump_t, J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t, + J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t}; +static const unsigned cmpgtiBitOpcode[8] = { + J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t, J4_cmpgti_fp1_jump_nt, + J4_cmpgti_fp1_jump_t, J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t, + J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t}; +static const unsigned cmpgtuiBitOpcode[8] = { + J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t, J4_cmpgtui_fp1_jump_nt, + J4_cmpgtui_fp1_jump_t, J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t, + J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t}; +static const unsigned cmpeqn1BitOpcode[8] = { + J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t, J4_cmpeqn1_fp1_jump_nt, + J4_cmpeqn1_fp1_jump_t, J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t, + J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t}; +static const unsigned cmpgtn1BitOpcode[8] = { + J4_cmpgtn1_fp0_jump_nt, J4_cmpgtn1_fp0_jump_t, J4_cmpgtn1_fp1_jump_nt, + J4_cmpgtn1_fp1_jump_t, J4_cmpgtn1_tp0_jump_nt, J4_cmpgtn1_tp0_jump_t, + J4_cmpgtn1_tp1_jump_nt, J4_cmpgtn1_tp1_jump_t, +}; + +// enum HexagonII::CompoundGroup +namespace { +unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + + switch (MI.getOpcode()) { + default: + return HexagonII::HCG_None; + // + // Compound pairs. + // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2" + // "Rd16=#U6 ; jump #r9:2" + // "Rd16=Rs16 ; jump #r9:2" + // + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + if (IsExtended) + return false; + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + Src2Reg = MI.getOperand(2).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) + return HexagonII::HCG_A; + break; + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + if (IsExtended) + return false; + // P0 = cmp.eq(Rs,#u2) + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (HexagonMCInstrInfo::inRange<5>(MI, 2) || + HexagonMCInstrInfo::minConstant(MI, 2) == -1)) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfr: + if (IsExtended) + return false; + // Rd = Rs + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfrsi: + if (IsExtended) + return false; + // Rd = #u6 + DstReg = MI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 && + HexagonMCInstrInfo::minConstant(MI, 1) >= 0 && + HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) + return HexagonII::HCG_A; + break; + case Hexagon::S2_tstbit_i: + if (IsExtended) + return false; + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::minConstant(MI, 2) == 0) + return HexagonII::HCG_A; + break; + // The fact that .new form is used pretty much guarantees + // that predicate register will match. Nevertheless, + // there could be some false positives without additional + // checking. + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnewpt: + Src1Reg = MI.getOperand(0).getReg(); + if (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg) + return HexagonII::HCG_B; + break; + // Transfer and jump: + // Rd=#U6 ; jump #r9:2 + // Rd=Rs ; jump #r9:2 + // Do not test for jump range here. + case Hexagon::J2_jump: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + return HexagonII::HCG_C; + break; + } + + return HexagonII::HCG_None; +} +} + +/// getCompoundOp - Return the index from 0-7 into the above opcode lists. +namespace { +unsigned getCompoundOp(MCInst const &HMCI) { + const MCOperand &Predicate = HMCI.getOperand(0); + unsigned PredReg = Predicate.getReg(); + + assert((PredReg == Hexagon::P0) || (PredReg == Hexagon::P1) || + (PredReg == Hexagon::P2) || (PredReg == Hexagon::P3)); + + switch (HMCI.getOpcode()) { + default: + llvm_unreachable("Expected match not found.\n"); + break; + case Hexagon::J2_jumpfnew: + return (PredReg == Hexagon::P0) ? fp0_jump_nt : fp1_jump_nt; + case Hexagon::J2_jumpfnewpt: + return (PredReg == Hexagon::P0) ? fp0_jump_t : fp1_jump_t; + case Hexagon::J2_jumptnew: + return (PredReg == Hexagon::P0) ? tp0_jump_nt : tp1_jump_nt; + case Hexagon::J2_jumptnewpt: + return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; + } +} +} + +namespace { +MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { + MCInst *CompoundInsn = 0; + unsigned compoundOpcode; + MCOperand Rs, Rt; + int64_t Value; + bool Success; + + switch (L.getOpcode()) { + default: + DEBUG(dbgs() << "Possible compound ignored\n"); + return CompoundInsn; + + case Hexagon::A2_tfrsi: + Rt = L.getOperand(0); + compoundOpcode = J4_jumpseti; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(L.getOperand(1)); // Immediate + CompoundInsn->addOperand(R.getOperand(0)); // Jump target + break; + + case Hexagon::A2_tfr: + Rt = L.getOperand(0); + Rs = L.getOperand(1); + + compoundOpcode = J4_jumpsetr; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(R.getOperand(0)); // Jump target. + + break; + + case Hexagon::C2_cmpeq: + DEBUG(dbgs() << "CX: C2_cmpeq\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgt: + DEBUG(dbgs() << "CX: C2_cmpgt\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgtu: + DEBUG(dbgs() << "CX: C2_cmpgtu\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpeqi: + DEBUG(dbgs() << "CX: C2_cmpeqi\n"); + Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success); + if (Value == -1) + compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)]; + else + compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)]; + + Rs = L.getOperand(1); + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + if (Value != -1) + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgti: + DEBUG(dbgs() << "CX: C2_cmpgti\n"); + Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success); + if (Value == -1) + compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)]; + else + compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)]; + + Rs = L.getOperand(1); + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + if (Value != -1) + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgtui: + DEBUG(dbgs() << "CX: C2_cmpgtui\n"); + Rs = L.getOperand(1); + compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::S2_tstbit_i: + DEBUG(dbgs() << "CX: S2_tstbit_i\n"); + Rs = L.getOperand(1); + compoundOpcode = tstBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(R.getOperand(1)); + break; + } + + return CompoundInsn; +} +} + +/// Non-Symmetrical. See if these two instructions are fit for compound pair. +namespace { +bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, + MCInst const &MIb, bool IsExtendedB) { + unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA); + unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB); + // We have two candidates - check that this is the same register + // we are talking about. + unsigned Opca = MIa.getOpcode(); + if (MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_C && + (Opca == Hexagon::A2_tfr || Opca == Hexagon::A2_tfrsi)) + return true; + return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && + (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); +} +} + +namespace { +bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { + assert(HexagonMCInstrInfo::isBundle(MCI)); + bool JExtended = false; + for (MCInst::iterator J = + MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset; + J != MCI.end(); ++J) { + MCInst const *JumpInst = J->getInst(); + if (HexagonMCInstrInfo::isImmext(*JumpInst)) { + JExtended = true; + continue; + } + if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) == + HexagonII::TypeJ) { + // Try to pair with another insn (B)undled with jump. + bool BExtended = false; + for (MCInst::iterator B = + MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset; + B != MCI.end(); ++B) { + MCInst const *Inst = B->getInst(); + if (JumpInst == Inst) + continue; + if (HexagonMCInstrInfo::isImmext(*Inst)) { + BExtended = true; + continue; + } + DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << "," + << Inst->getOpcode() << "\n"); + if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) { + MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst); + if (CompoundInsn) { + DEBUG(dbgs() << "B: " << Inst->getOpcode() << "," + << JumpInst->getOpcode() << " Compounds to " + << CompoundInsn->getOpcode() << "\n"); + J->setInst(CompoundInsn); + MCI.erase(B); + return true; + } + } + BExtended = false; + } + } + JExtended = false; + } + return false; +} +} + +/// tryCompound - Given a bundle check for compound insns when one +/// is found update the contents fo the bundle with the compound insn. +/// If a compound instruction is found then the bundle will have one +/// additional slot. +void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, + MCContext &Context, MCInst &MCI) { + assert(HexagonMCInstrInfo::isBundle(MCI) && + "Non-Bundle where Bundle expected"); + + // By definition a compound must have 2 insn. + if (MCI.size() < 2) + return; + + // Look for compounds until none are found, only update the bundle when + // a compound is found. + while (lookForCompound(MCII, Context, MCI)) + ; + + return; +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp new file mode 100644 index 0000000..e6194f6 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -0,0 +1,1087 @@ +//===----- HexagonMCDuplexInfo.cpp - Instruction bundle checking ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements duplexing of instructions to reduce code size +// +//===----------------------------------------------------------------------===// + +#include "HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <map> + +using namespace llvm; +using namespace Hexagon; + +#define DEBUG_TYPE "hexagon-mcduplex-info" + +// pair table of subInstructions with opcodes +static const std::pair<unsigned, unsigned> opcodeData[] = { + std::make_pair((unsigned)V4_SA1_addi, 0), + std::make_pair((unsigned)V4_SA1_addrx, 6144), + std::make_pair((unsigned)V4_SA1_addsp, 3072), + std::make_pair((unsigned)V4_SA1_and1, 4608), + std::make_pair((unsigned)V4_SA1_clrf, 6768), + std::make_pair((unsigned)V4_SA1_clrfnew, 6736), + std::make_pair((unsigned)V4_SA1_clrt, 6752), + std::make_pair((unsigned)V4_SA1_clrtnew, 6720), + std::make_pair((unsigned)V4_SA1_cmpeqi, 6400), + std::make_pair((unsigned)V4_SA1_combine0i, 7168), + std::make_pair((unsigned)V4_SA1_combine1i, 7176), + std::make_pair((unsigned)V4_SA1_combine2i, 7184), + std::make_pair((unsigned)V4_SA1_combine3i, 7192), + std::make_pair((unsigned)V4_SA1_combinerz, 7432), + std::make_pair((unsigned)V4_SA1_combinezr, 7424), + std::make_pair((unsigned)V4_SA1_dec, 4864), + std::make_pair((unsigned)V4_SA1_inc, 4352), + std::make_pair((unsigned)V4_SA1_seti, 2048), + std::make_pair((unsigned)V4_SA1_setin1, 6656), + std::make_pair((unsigned)V4_SA1_sxtb, 5376), + std::make_pair((unsigned)V4_SA1_sxth, 5120), + std::make_pair((unsigned)V4_SA1_tfr, 4096), + std::make_pair((unsigned)V4_SA1_zxtb, 5888), + std::make_pair((unsigned)V4_SA1_zxth, 5632), + std::make_pair((unsigned)V4_SL1_loadri_io, 0), + std::make_pair((unsigned)V4_SL1_loadrub_io, 4096), + std::make_pair((unsigned)V4_SL2_deallocframe, 7936), + std::make_pair((unsigned)V4_SL2_jumpr31, 8128), + std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133), + std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135), + std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132), + std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134), + std::make_pair((unsigned)V4_SL2_loadrb_io, 4096), + std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680), + std::make_pair((unsigned)V4_SL2_loadrh_io, 0), + std::make_pair((unsigned)V4_SL2_loadri_sp, 7168), + std::make_pair((unsigned)V4_SL2_loadruh_io, 2048), + std::make_pair((unsigned)V4_SL2_return, 8000), + std::make_pair((unsigned)V4_SL2_return_f, 8005), + std::make_pair((unsigned)V4_SL2_return_fnew, 8007), + std::make_pair((unsigned)V4_SL2_return_t, 8004), + std::make_pair((unsigned)V4_SL2_return_tnew, 8006), + std::make_pair((unsigned)V4_SS1_storeb_io, 4096), + std::make_pair((unsigned)V4_SS1_storew_io, 0), + std::make_pair((unsigned)V4_SS2_allocframe, 7168), + std::make_pair((unsigned)V4_SS2_storebi0, 4608), + std::make_pair((unsigned)V4_SS2_storebi1, 4864), + std::make_pair((unsigned)V4_SS2_stored_sp, 2560), + std::make_pair((unsigned)V4_SS2_storeh_io, 0), + std::make_pair((unsigned)V4_SS2_storew_sp, 2048), + std::make_pair((unsigned)V4_SS2_storewi0, 4096), + std::make_pair((unsigned)V4_SS2_storewi1, 4352)}; + +static std::map<unsigned, unsigned> + subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData)); + +bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + return false; + case HexagonII::HSIG_L1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_L2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_A: + return (Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_Compound: + return (Gb == HexagonII::HSIG_Compound); + } + return false; +} + +unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + break; + case HexagonII::HSIG_L1: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0; + case HexagonII::HSIG_A: + return 0x4; + } + case HexagonII::HSIG_L2: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0x1; + case HexagonII::HSIG_L2: + return 0x2; + case HexagonII::HSIG_A: + return 0x5; + } + case HexagonII::HSIG_S1: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0x8; + case HexagonII::HSIG_L2: + return 0x9; + case HexagonII::HSIG_S1: + return 0xA; + case HexagonII::HSIG_A: + return 0x6; + } + case HexagonII::HSIG_S2: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0xC; + case HexagonII::HSIG_L2: + return 0xD; + case HexagonII::HSIG_S1: + return 0xB; + case HexagonII::HSIG_S2: + return 0xE; + case HexagonII::HSIG_A: + return 0x7; + } + case HexagonII::HSIG_A: + switch (Gb) { + default: + break; + case HexagonII::HSIG_A: + return 0x3; + } + case HexagonII::HSIG_Compound: + switch (Gb) { + case HexagonII::HSIG_Compound: + return 0xFFFFFFFF; + } + } + return 0xFFFFFFFF; +} + +unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { + unsigned DstReg, PredReg, SrcReg, Src1Reg, Src2Reg; + + switch (MCI.getOpcode()) { + default: + return HexagonII::HSIG_None; + // + // Group L1: + // + // Rd = memw(Rs+#u4:2) + // Rd = memub(Rs+#u4:0) + case Hexagon::L2_loadri_io: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + // Special case this one from Group L2. + // Rd = memw(r29+#u5:2) + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + if (HexagonMCInstrInfo::isIntReg(SrcReg) && + Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) { + return HexagonII::HSIG_L2; + } + // Rd = memw(Rs+#u4:2) + if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + inRange<4, 2>(MCI, 2)) { + return HexagonII::HSIG_L1; + } + } + break; + case Hexagon::L2_loadrub_io: + // Rd = memub(Rs+#u4:0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + inRange<4>(MCI, 2)) { + return HexagonII::HSIG_L1; + } + break; + // + // Group L2: + // + // Rd = memh/memuh(Rs+#u3:1) + // Rd = memb(Rs+#u3:0) + // Rd = memw(r29+#u5:2) - Handled above. + // Rdd = memd(r29+#u5:3) + // deallocframe + // [if ([!]p0[.new])] dealloc_return + // [if ([!]p0[.new])] jumpr r31 + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + // Rd = memh/memuh(Rs+#u3:1) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + inRange<3, 1>(MCI, 2)) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L2_loadrb_io: + // Rd = memb(Rs+#u3:0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + inRange<3>(MCI, 2)) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L2_loadrd_io: + // Rdd = memd(r29+#u5:3) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + inRange<5, 3>(MCI, 2)) { + return HexagonII::HSIG_L2; + } + break; + + case Hexagon::L4_return: + + case Hexagon::L2_deallocframe: + + return HexagonII::HSIG_L2; + case Hexagon::EH_RETURN_JMPR: + + case Hexagon::J2_jumpr: + case Hexagon::JMPret: + // jumpr r31 + // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>. + DstReg = MCI.getOperand(0).getReg(); + if (Hexagon::R31 == DstReg) { + return HexagonII::HSIG_L2; + } + break; + + case Hexagon::J2_jumprt: + case Hexagon::J2_jumprf: + case Hexagon::J2_jumprtnew: + case Hexagon::J2_jumprfnew: + case Hexagon::JMPrett: + case Hexagon::JMPretf: + case Hexagon::JMPrettnew: + case Hexagon::JMPretfnew: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPretfnewpt: + DstReg = MCI.getOperand(1).getReg(); + SrcReg = MCI.getOperand(0).getReg(); + // [if ([!]p0[.new])] jumpr r31 + if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) && + (Hexagon::R31 == DstReg)) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L4_return_t: + + case Hexagon::L4_return_f: + + case Hexagon::L4_return_tnew_pnt: + + case Hexagon::L4_return_fnew_pnt: + + case Hexagon::L4_return_tnew_pt: + + case Hexagon::L4_return_fnew_pt: + // [if ([!]p0[.new])] dealloc_return + SrcReg = MCI.getOperand(0).getReg(); + if (Hexagon::P0 == SrcReg) { + return HexagonII::HSIG_L2; + } + break; + // + // Group S1: + // + // memw(Rs+#u4:2) = Rt + // memb(Rs+#u4:0) = Rt + case Hexagon::S2_storeri_io: + // Special case this one from Group S2. + // memw(r29+#u5:2) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntReg(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) { + return HexagonII::HSIG_S2; + } + // memw(Rs+#u4:2) = Rt + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + inRange<4, 2>(MCI, 1)) { + return HexagonII::HSIG_S1; + } + break; + case Hexagon::S2_storerb_io: + // memb(Rs+#u4:0) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + inRange<4>(MCI, 1)) { + return HexagonII::HSIG_S1; + } + break; + // + // Group S2: + // + // memh(Rs+#u3:1) = Rt + // memw(r29+#u5:2) = Rt + // memd(r29+#s6:3) = Rtt + // memw(Rs+#u4:2) = #U1 + // memb(Rs+#u4) = #U1 + // allocframe(#u5:3) + case Hexagon::S2_storerh_io: + // memh(Rs+#u3:1) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + inRange<3, 1>(MCI, 1)) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S2_storerd_io: + // memd(r29+#s6:3) = Rtt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) && + HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg && + inSRange<6, 3>(MCI, 1)) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S4_storeiri_io: + // memw(Rs+#u4:2) = #U1 + Src1Reg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S4_storeirb_io: + // memb(Rs+#u4) = #U1 + Src1Reg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S2_allocframe: + if (inRange<5, 3>(MCI, 0)) + return HexagonII::HSIG_S2; + break; + // + // Group A: + // + // Rx = add(Rx,#s7) + // Rd = Rs + // Rd = #u6 + // Rd = #-1 + // if ([!]P0[.new]) Rd = #0 + // Rd = add(r29,#u6:2) + // Rx = add(Rx,Rs) + // P0 = cmp.eq(Rs,#u2) + // Rdd = combine(#0,Rs) + // Rdd = combine(Rs,#0) + // Rdd = combine(#u2,#U2) + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + // Rd = sxth/sxtb/zxtb/zxth(Rs) + // Rd = and(Rs,#1) + case Hexagon::A2_addi: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + // Rd = add(r29,#u6:2) + if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + inRange<6, 2>(MCI, 2)) { + return HexagonII::HSIG_A; + } + // Rx = add(Rx,#s7) + if (DstReg == SrcReg) { + return HexagonII::HSIG_A; + } + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) { + return HexagonII::HSIG_A; + } + } + break; + case Hexagon::A2_add: + // Rx = add(Rx,Rs) + DstReg = MCI.getOperand(0).getReg(); + Src1Reg = MCI.getOperand(1).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_andir: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_tfrsi: + DstReg = MCI.getOperand(0).getReg(); + + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmoveif: + case Hexagon::C2_cmovenewif: + // if ([!]P0[.new]) Rd = #0 + // Actual form: + // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>; + DstReg = MCI.getOperand(0).getReg(); // Rd + PredReg = MCI.getOperand(1).getReg(); // P0 + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::C2_cmpeqi: + // P0 = cmp.eq(Rs,#u2) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (Hexagon::P0 == DstReg && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + inRange<2>(MCI, 2)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + // Rdd = combine(#u2,#U2) + DstReg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A4_combineri: + // Rdd = combine(Rs,#0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + minConstant(MCI, 2) == 0) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A4_combineir: + // Rdd = combine(#0,Rs) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + minConstant(MCI, 1) == 0) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + // Rd = sxth/sxtb/zxtb/zxth(Rs) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) { + return HexagonII::HSIG_A; + } + break; + } + + return HexagonII::HSIG_None; +} + +bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) { + unsigned DstReg, SrcReg; + switch (potentialDuplex.getOpcode()) { + case Hexagon::A2_addi: + // testing for case of: Rx = add(Rx,#s7) + DstReg = potentialDuplex.getOperand(0).getReg(); + SrcReg = potentialDuplex.getOperand(1).getReg(); + if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + int64_t Value; + if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value)) + return true; + if (!isShiftedInt<7, 0>(Value)) + return true; + } + break; + case Hexagon::A2_tfrsi: + DstReg = potentialDuplex.getOperand(0).getReg(); + + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + int64_t Value; + if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value)) + return true; + // Check for case of Rd = #-1. + if (Value == -1) + return false; + // Check for case of Rd = #u6. + if (!isShiftedUInt<6, 0>(Value)) + return true; + } + break; + default: + break; + } + return false; +} + +/// non-Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII, + MCInst const &MIa, bool ExtendedA, + MCInst const &MIb, bool ExtendedB, + bool bisReversable) { + // Slot 1 cannot be extended in duplexes PRM 10.5 + if (ExtendedA) + return false; + // Only A2_addi and A2_tfrsi can be extended in duplex form PRM 10.5 + if (ExtendedB) { + unsigned Opcode = MIb.getOpcode(); + if ((Opcode != Hexagon::A2_addi) && (Opcode != Hexagon::A2_tfrsi)) + return false; + } + unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa), + MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb); + + // If a duplex contains 2 insns in the same group, the insns must be + // ordered such that the numerically smaller opcode is in slot 1. + if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) { + MCInst SubInst0 = HexagonMCInstrInfo::deriveSubInst(MIa); + MCInst SubInst1 = HexagonMCInstrInfo::deriveSubInst(MIb); + + unsigned zeroedSubInstS0 = + subinstOpcodeMap.find(SubInst0.getOpcode())->second; + unsigned zeroedSubInstS1 = + subinstOpcodeMap.find(SubInst1.getOpcode())->second; + + if (zeroedSubInstS0 < zeroedSubInstS1) + // subinstS0 (maps to slot 0) must be greater than + // subinstS1 (maps to slot 1) + return false; + } + + // allocframe must always be in slot 0 + if (MIb.getOpcode() == Hexagon::S2_allocframe) + return false; + + if ((MIaG != HexagonII::HSIG_None) && (MIbG != HexagonII::HSIG_None)) { + // Prevent 2 instructions with extenders from duplexing + // Note that MIb (slot1) can be extended and MIa (slot0) + // can never be extended + if (subInstWouldBeExtended(MIa)) + return false; + + // If duplexing produces an extender, but the original did not + // have an extender, do not duplex. + if (subInstWouldBeExtended(MIb) && !ExtendedB) + return false; + } + + // If jumpr r31 appears, it must be in slot 0, and never slot 1 (MIb). + if (MIbG == HexagonII::HSIG_L2) { + if ((MIb.getNumOperands() > 1) && MIb.getOperand(1).isReg() && + (MIb.getOperand(1).getReg() == Hexagon::R31)) + return false; + if ((MIb.getNumOperands() > 0) && MIb.getOperand(0).isReg() && + (MIb.getOperand(0).getReg() == Hexagon::R31)) + return false; + } + + // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb); + // therefore, not duplexable if slot 1 is a store, and slot 0 is not. + if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) { + if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2)) + return false; + } + + return (isDuplexPairMatch(MIaG, MIbG)); +} + +/// Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonMCInstrInfo::isDuplexPair(MCInst const &MIa, MCInst const &MIb) { + unsigned MIaG = getDuplexCandidateGroup(MIa), + MIbG = getDuplexCandidateGroup(MIb); + return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); +} + +inline static void addOps(MCInst &subInstPtr, MCInst const &Inst, + unsigned opNum) { + if (Inst.getOperand(opNum).isReg()) { + switch (Inst.getOperand(opNum).getReg()) { + default: + llvm_unreachable("Not Duplexable Register"); + break; + case Hexagon::R0: + case Hexagon::R1: + case Hexagon::R2: + case Hexagon::R3: + case Hexagon::R4: + case Hexagon::R5: + case Hexagon::R6: + case Hexagon::R7: + case Hexagon::D0: + case Hexagon::D1: + case Hexagon::D2: + case Hexagon::D3: + case Hexagon::R16: + case Hexagon::R17: + case Hexagon::R18: + case Hexagon::R19: + case Hexagon::R20: + case Hexagon::R21: + case Hexagon::R22: + case Hexagon::R23: + case Hexagon::D8: + case Hexagon::D9: + case Hexagon::D10: + case Hexagon::D11: + subInstPtr.addOperand(Inst.getOperand(opNum)); + break; + } + } else + subInstPtr.addOperand(Inst.getOperand(opNum)); +} + +MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { + MCInst Result; + bool Absolute; + int64_t Value; + switch (Inst.getOpcode()) { + default: + // dbgs() << "opcode: "<< Inst->getOpcode() << "\n"; + llvm_unreachable("Unimplemented subinstruction \n"); + break; + case Hexagon::A2_addi: + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 1) { + Result.setOpcode(Hexagon::V4_SA1_inc); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs, #1) + else if (Value == -1) { + Result.setOpcode(Hexagon::V4_SA1_dec); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs,#-1) + else if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SA1_addsp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; + } // 1,3 SUBInst $Rd = add(r29, #$u6_2) + else { + Result.setOpcode(Hexagon::V4_SA1_addi); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; + } // 1,2,3 SUBInst $Rx = add($Rx, #$s7) + case Hexagon::A2_add: + Result.setOpcode(Hexagon::V4_SA1_addrx); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rx = add($_src_, $Rs) + case Hexagon::S2_allocframe: + Result.setOpcode(Hexagon::V4_SS2_allocframe); + addOps(Result, Inst, 0); + break; // 1 SUBInst allocframe(#$u5_3) + case Hexagon::A2_andir: + if (minConstant(Inst, 2) == 255) { + Result.setOpcode(Hexagon::V4_SA1_zxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 $Rd = and($Rs, #255) + } else { + Result.setOpcode(Hexagon::V4_SA1_and1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = and($Rs, #1) + } + case Hexagon::C2_cmpeqi: + Result.setOpcode(Hexagon::V4_SA1_cmpeqi); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2) + case Hexagon::A4_combineii: + case Hexagon::A2_combineii: + Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 1) { + Result.setOpcode(Hexagon::V4_SA1_combine1i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#1, #$u2) + } + if (Value == 3) { + Result.setOpcode(Hexagon::V4_SA1_combine3i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#3, #$u2) + } + if (Value == 0) { + Result.setOpcode(Hexagon::V4_SA1_combine0i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#0, #$u2) + } + if (Value == 2) { + Result.setOpcode(Hexagon::V4_SA1_combine2i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#2, #$u2) + } + case Hexagon::A4_combineir: + Result.setOpcode(Hexagon::V4_SA1_combinezr); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#0, $Rs) + + case Hexagon::A4_combineri: + Result.setOpcode(Hexagon::V4_SA1_combinerz); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rdd = combine($Rs, #0) + case Hexagon::L4_return_tnew_pnt: + case Hexagon::L4_return_tnew_pt: + Result.setOpcode(Hexagon::V4_SL2_return_tnew); + break; // none SUBInst if (p0.new) dealloc_return:nt + case Hexagon::L4_return_fnew_pnt: + case Hexagon::L4_return_fnew_pt: + Result.setOpcode(Hexagon::V4_SL2_return_fnew); + break; // none SUBInst if (!p0.new) dealloc_return:nt + case Hexagon::L4_return_f: + Result.setOpcode(Hexagon::V4_SL2_return_f); + break; // none SUBInst if (!p0) dealloc_return + case Hexagon::L4_return_t: + Result.setOpcode(Hexagon::V4_SL2_return_t); + break; // none SUBInst if (p0) dealloc_return + case Hexagon::L4_return: + Result.setOpcode(Hexagon::V4_SL2_return); + break; // none SUBInst dealloc_return + case Hexagon::L2_deallocframe: + Result.setOpcode(Hexagon::V4_SL2_deallocframe); + break; // none SUBInst deallocframe + case Hexagon::EH_RETURN_JMPR: + case Hexagon::J2_jumpr: + case Hexagon::JMPret: + Result.setOpcode(Hexagon::V4_SL2_jumpr31); + break; // none SUBInst jumpr r31 + case Hexagon::J2_jumprf: + case Hexagon::JMPretf: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_f); + break; // none SUBInst if (!p0) jumpr r31 + case Hexagon::J2_jumprfnew: + case Hexagon::JMPretfnewpt: + case Hexagon::JMPretfnew: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew); + break; // none SUBInst if (!p0.new) jumpr:nt r31 + case Hexagon::J2_jumprt: + case Hexagon::JMPrett: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_t); + break; // none SUBInst if (p0) jumpr r31 + case Hexagon::J2_jumprtnew: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPrettnew: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew); + break; // none SUBInst if (p0.new) jumpr:nt r31 + case Hexagon::L2_loadrb_io: + Result.setOpcode(Hexagon::V4_SL2_loadrb_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memb($Rs + #$u3_0) + case Hexagon::L2_loadrd_io: + Result.setOpcode(Hexagon::V4_SL2_loadrd_sp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = memd(r29 + #$u5_3) + case Hexagon::L2_loadrh_io: + Result.setOpcode(Hexagon::V4_SL2_loadrh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memh($Rs + #$u3_1) + case Hexagon::L2_loadrub_io: + Result.setOpcode(Hexagon::V4_SL1_loadrub_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memub($Rs + #$u4_0) + case Hexagon::L2_loadruh_io: + Result.setOpcode(Hexagon::V4_SL2_loadruh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1) + case Hexagon::L2_loadri_io: + if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SL2_loadri_sp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 2 1,3 SUBInst $Rd = memw(r29 + #$u5_2) + } else { + Result.setOpcode(Hexagon::V4_SL1_loadri_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2) + } + case Hexagon::S4_storeirb_io: + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 0) { + Result.setOpcode(Hexagon::V4_SS2_storebi0); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0 + } else if (Value == 1) { + Result.setOpcode(Hexagon::V4_SS2_storebi1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1 + } + case Hexagon::S2_storerb_io: + Result.setOpcode(Hexagon::V4_SS1_storeb_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt + case Hexagon::S2_storerd_io: + Result.setOpcode(Hexagon::V4_SS2_stored_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 2,3 SUBInst memd(r29 + #$s6_3) = $Rtt + case Hexagon::S2_storerh_io: + Result.setOpcode(Hexagon::V4_SS2_storeh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt + case Hexagon::S4_storeiri_io: + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 0) { + Result.setOpcode(Hexagon::V4_SS2_storewi0); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0 + } else if (Value == 1) { + Result.setOpcode(Hexagon::V4_SS2_storewi1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#1 + } else if (Inst.getOperand(0).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SS2_storew_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt + } + case Hexagon::S2_storeri_io: + if (Inst.getOperand(0).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SS2_storew_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); // 1,2,3 SUBInst memw(sp + #$u5_2) = $Rt + } else { + Result.setOpcode(Hexagon::V4_SS1_storew_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); // 1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt + } + break; + case Hexagon::A2_sxtb: + Result.setOpcode(Hexagon::V4_SA1_sxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = sxtb($Rs) + case Hexagon::A2_sxth: + Result.setOpcode(Hexagon::V4_SA1_sxth); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = sxth($Rs) + case Hexagon::A2_tfr: + Result.setOpcode(Hexagon::V4_SA1_tfr); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = $Rs + case Hexagon::C2_cmovenewif: + Result.setOpcode(Hexagon::V4_SA1_clrfnew); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (!p0.new) $Rd = #0 + case Hexagon::C2_cmovenewit: + Result.setOpcode(Hexagon::V4_SA1_clrtnew); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (p0.new) $Rd = #0 + case Hexagon::C2_cmoveif: + Result.setOpcode(Hexagon::V4_SA1_clrf); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (!p0) $Rd = #0 + case Hexagon::C2_cmoveit: + Result.setOpcode(Hexagon::V4_SA1_clrt); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (p0) $Rd = #0 + case Hexagon::A2_tfrsi: + Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); + if (Absolute && Value == -1) { + Result.setOpcode(Hexagon::V4_SA1_setin1); + addOps(Result, Inst, 0); + break; // 2 1 SUBInst $Rd = #-1 + } else { + Result.setOpcode(Hexagon::V4_SA1_seti); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = #$u6 + } + case Hexagon::A2_zxtb: + Result.setOpcode(Hexagon::V4_SA1_zxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 $Rd = and($Rs, #255) + + case Hexagon::A2_zxth: + Result.setOpcode(Hexagon::V4_SA1_zxth); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = zxth($Rs) + } + return Result; +} + +static bool isStoreInst(unsigned opCode) { + switch (opCode) { + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerd_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S2_allocframe: + return true; + default: + return false; + } +} + +SmallVector<DuplexCandidate, 8> +HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, + MCInst const &MCB) { + assert(isBundle(MCB)); + SmallVector<DuplexCandidate, 8> duplexToTry; + // Use an "order matters" version of isDuplexPair. + unsigned numInstrInPacket = MCB.getNumOperands(); + + for (unsigned distance = 1; distance < numInstrInPacket; ++distance) { + for (unsigned j = HexagonMCInstrInfo::bundleInstructionsOffset, + k = j + distance; + (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) { + + // Check if reversable. + bool bisReversable = true; + if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) && + isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) { + DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j + << "\n"); + bisReversable = false; + } + if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf + bisReversable = false; + + // Try in order. + if (isOrderedDuplexPair( + MCII, *MCB.getOperand(k).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1), + *MCB.getOperand(j).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1), + bisReversable)) { + // Get iClass. + unsigned iClass = iClassOfDuplexPair( + getDuplexCandidateGroup(*MCB.getOperand(k).getInst()), + getDuplexCandidateGroup(*MCB.getOperand(j).getInst())); + + // Save off pairs for duplex checking. + duplexToTry.push_back(DuplexCandidate(j, k, iClass)); + DEBUG(dbgs() << "adding pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + continue; + } else { + DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } + + // Try reverse. + if (bisReversable) { + if (isOrderedDuplexPair( + MCII, *MCB.getOperand(j).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1), + *MCB.getOperand(k).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1), + bisReversable)) { + // Get iClass. + unsigned iClass = iClassOfDuplexPair( + getDuplexCandidateGroup(*MCB.getOperand(j).getInst()), + getDuplexCandidateGroup(*MCB.getOperand(k).getInst())); + + // Save off pairs for duplex checking. + duplexToTry.push_back(DuplexCandidate(k, j, iClass)); + DEBUG(dbgs() << "adding pair:" << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } else { + DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } + } + } + } + return duplexToTry; +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp new file mode 100644 index 0000000..eaa3550 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -0,0 +1,150 @@ +//=== HexagonMCELFStreamer.cpp - Hexagon subclass of MCELFStreamer -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a stub that parses a MCInst bundle and passes the +// instructions on to the real streamer. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagonmcelfstreamer" + +#include "Hexagon.h" +#include "HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<unsigned> + GPSize("gpsize", cl::NotHidden, + cl::desc("Global Pointer Addressing Size. The default size is 8."), + cl::Prefix, cl::init(8)); + +void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK, + const MCSubtargetInfo &STI) { + MCInst HMI = HexagonMCInstrInfo::createBundle(); + MCInst *MCB; + + if (MCK.getOpcode() != Hexagon::BUNDLE) { + HMI.addOperand(MCOperand::createInst(&MCK)); + MCB = &HMI; + } else + MCB = const_cast<MCInst *>(&MCK); + + // Examines packet and pad the packet, if needed, when an + // end-loop is in the bundle. + HexagonMCInstrInfo::padEndloop(getContext(), *MCB); + HexagonMCShuffle(*MCII, STI, *MCB); + + assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE); + bool Extended = false; + for (auto &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) { + MCInst *MCI = const_cast<MCInst *>(I.getInst()); + if (Extended) { + if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) { + MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst()); + HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst); + } else { + HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI); + } + Extended = false; + } else { + Extended = HexagonMCInstrInfo::isImmext(*MCI); + } + } + + // At this point, MCB is a bundle + // Iterate through the bundle and assign addends for the instructions + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) { + MCInst *MCI = const_cast<MCInst *>(I.getInst()); + EmitSymbol(*MCI); + } + MCObjectStreamer::EmitInstruction(*MCB, STI); +} + +void HexagonMCELFStreamer::EmitSymbol(const MCInst &Inst) { + // Scan for values. + for (unsigned i = Inst.getNumOperands(); i--;) + if (Inst.getOperand(i).isExpr()) + visitUsedExpr(*Inst.getOperand(i).getExpr()); +} + +// EmitCommonSymbol and EmitLocalCommonSymbol are extended versions of the +// functions found in MCELFStreamer.cpp taking AccessSize as an additional +// parameter. +void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, + uint64_t Size, + unsigned ByteAlignment, + unsigned AccessSize) { + getAssembler().registerSymbol(*Symbol); + StringRef sbss[4] = {".sbss.1", ".sbss.2", ".sbss.4", ".sbss.8"}; + + auto ELFSymbol = cast<MCSymbolELF>(Symbol); + if (!ELFSymbol->isBindingSet()) { + ELFSymbol->setBinding(ELF::STB_GLOBAL); + ELFSymbol->setExternal(true); + } + + ELFSymbol->setType(ELF::STT_OBJECT); + + if (ELFSymbol->getBinding() == ELF::STB_LOCAL) { + StringRef SectionName = + ((AccessSize == 0) || (Size == 0) || (Size > GPSize)) + ? ".bss" + : sbss[(Log2_64(AccessSize))]; + + MCSection *CrntSection = getCurrentSection().first; + MCSection *Section = getAssembler().getContext().getELFSection( + SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + SwitchSection(Section); + AssignFragment(Symbol, getCurrentFragment()); + + MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment); + SwitchSection(CrntSection); + } else { + if (ELFSymbol->declareCommon(Size, ByteAlignment)) + report_fatal_error("Symbol: " + Symbol->getName() + + " redeclared as different type"); + if ((AccessSize) && (Size <= GPSize)) { + uint64_t SectionIndex = + (AccessSize <= GPSize) + ? ELF::SHN_HEXAGON_SCOMMON + (Log2_64(AccessSize) + 1) + : (unsigned)ELF::SHN_HEXAGON_SCOMMON; + ELFSymbol->setIndex(SectionIndex); + } + } + + ELFSymbol->setSize(MCConstantExpr::create(Size, getContext())); +} + +void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol( + MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, + unsigned AccessSize) { + getAssembler().registerSymbol(*Symbol); + auto ELFSymbol = cast<MCSymbolELF>(Symbol); + ELFSymbol->setBinding(ELF::STB_LOCAL); + ELFSymbol->setExternal(false); + HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize); +} + +namespace llvm { +MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB, + raw_pwrite_stream &OS, MCCodeEmitter *CE) { + return new HexagonMCELFStreamer(Context, MAB, OS, CE); +} +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h new file mode 100644 index 0000000..d77c0cd --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h @@ -0,0 +1,45 @@ +//===- HexagonMCELFStreamer.h - Hexagon subclass of MCElfStreamer ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCELFSTREAMER_H +#define HEXAGONMCELFSTREAMER_H + +#include "MCTargetDesc/HexagonMCCodeEmitter.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/MC/MCELFStreamer.h" +#include "HexagonTargetStreamer.h" + +namespace llvm { + +class HexagonMCELFStreamer : public MCELFStreamer { + std::unique_ptr<MCInstrInfo> MCII; + +public: + HexagonMCELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter) + : MCELFStreamer(Context, TAB, OS, Emitter), + MCII(createHexagonMCInstrInfo()) {} + + virtual void EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override; + void EmitSymbol(const MCInst &Inst); + void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, + unsigned AccessSize); + void HexagonMCEmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, unsigned AccessSize); +}; + +MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB, + raw_pwrite_stream &OS, MCCodeEmitter *CE); + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp new file mode 100644 index 0000000..fc62626 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -0,0 +1,49 @@ +//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes +//----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-mcexpr" + +HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr, + MCContext &Ctx) { + return new (Ctx) HexagonNoExtendOperand(Expr); +} + +bool HexagonNoExtendOperand::evaluateAsRelocatableImpl( + MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const { + return Expr->evaluateAsRelocatable(Res, Layout, Fixup); +} + +void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {} + +MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const { + return Expr->findAssociatedFragment(); +} + +void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + +MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; } + +bool HexagonNoExtendOperand::classof(MCExpr const *E) { + return E->getKind() == MCExpr::Target; +} + +HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr) + : Expr(Expr) {} + +void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + Expr->print(OS, MAI); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h new file mode 100644 index 0000000..60f180f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h @@ -0,0 +1,35 @@ +//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { +class MCInst; +class HexagonNoExtendOperand : public MCTargetExpr { +public: + static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx); + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + MCFragment *findAssociatedFragment() const override; + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + static bool classof(MCExpr const *E); + MCExpr const *getExpr() const; + +private: + HexagonNoExtendOperand(MCExpr const *Expr); + MCExpr const *Expr; +}; +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp new file mode 100644 index 0000000..e684207 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -0,0 +1,648 @@ +//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInstrInfo to allow Hexagon specific MCInstr queries +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCInstrInfo.h" + +#include "Hexagon.h" +#include "HexagonBaseInfo.h" +#include "HexagonMCChecker.h" + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { +void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value, + MCContext &Context) { + MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context))); +} + +void HexagonMCInstrInfo::addConstExtender(MCContext &Context, + MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI) { + assert(HexagonMCInstrInfo::isBundle(MCB)); + MCOperand const &exOp = + MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI)); + + // Create the extender. + MCInst *XMCI = + new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp)); + + MCB.addOperand(MCOperand::createInst(XMCI)); +} + +iterator_range<MCInst::const_iterator> +HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { + assert(isBundle(MCI)); + return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end()); +} + +size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { + if (HexagonMCInstrInfo::isBundle(MCI)) + return (MCI.size() - bundleInstructionsOffset); + else + return (1); +} + +bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Check) { + // Examine the packet and convert pairs of instructions to compound + // instructions when possible. + if (!HexagonDisableCompound) + HexagonMCInstrInfo::tryCompound(MCII, Context, MCB); + // Check the bundle for errors. + bool CheckOk = Check ? Check->check() : true; + if (!CheckOk) + return false; + HexagonMCShuffle(MCII, STI, MCB); + // Examine the packet and convert pairs of instructions to duplex + // instructions when possible. + MCInst InstBundlePreDuplex = MCInst(MCB); + if (!HexagonDisableDuplex) { + SmallVector<DuplexCandidate, 8> possibleDuplexes; + possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB); + HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes); + } + // Examines packet and pad the packet, if needed, when an + // end-loop is in the bundle. + HexagonMCInstrInfo::padEndloop(Context, MCB); + // If compounding and duplexing didn't reduce the size below + // 4 or less we have a packet that is too big. + if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) + return false; + HexagonMCShuffle(MCII, STI, MCB); + return true; +} + +void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, + MCContext &Context, MCInst &MCI) { + assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) || + HexagonMCInstrInfo::isExtended(MCII, MCI)); + MCOperand &exOp = + MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI)); + // If the extended value is a constant, then use it for the extended and + // for the extender instructions, masking off the lower 6 bits and + // including the assumed bits. + int64_t Value; + if (exOp.getExpr()->evaluateAsAbsolute(Value)) { + unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI); + exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context)); + } +} + +MCInst HexagonMCInstrInfo::createBundle() { + MCInst Result; + Result.setOpcode(Hexagon::BUNDLE); + Result.addOperand(MCOperand::createImm(0)); + return Result; +} + +MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass, + MCInst const &inst0, + MCInst const &inst1) { + assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf"); + MCInst *duplexInst = new (Context) MCInst; + duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass); + + MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0)); + MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1)); + duplexInst->addOperand(MCOperand::createInst(SubInst0)); + duplexInst->addOperand(MCOperand::createInst(SubInst1)); + return duplexInst; +} + +MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII, + MCInst const &Inst, + MCOperand const &MO) { + assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) || + HexagonMCInstrInfo::isExtended(MCII, Inst)); + + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst); + MCInst XMI; + XMI.setOpcode((Desc.isBranch() || Desc.isCall() || + HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR) + ? Hexagon::A4_ext_b + : Hexagon::A4_ext); + if (MO.isImm()) + XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f))); + else if (MO.isExpr()) + XMI.addOperand(MCOperand::createExpr(MO.getExpr())); + else + llvm_unreachable("invalid extendable operand"); + return XMI; +} + +MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, + size_t Index) { + assert(Index <= bundleSize(MCB)); + if (Index == 0) + return nullptr; + MCInst const *Inst = + MCB.getOperand(Index + bundleInstructionsOffset - 1).getInst(); + if (isImmext(*Inst)) + return Inst; + return nullptr; +} + +void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context, + MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI, bool MustExtend) { + if (isConstExtended(MCII, MCI) || MustExtend) + addConstExtender(Context, MCII, MCB, MCI); +} + +HexagonII::MemAccessSize +HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + return (HexagonII::MemAccessSize((F >> HexagonII::MemAccessSizePos) & + HexagonII::MemAccesSizeMask)); +} + +unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +// Return constant extended operand number. +unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII, + MCInst const &MCI) { + return (MCII.get(MCI.getOpcode())); +} + +unsigned short HexagonMCInstrInfo::getExtendableOp(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +MCOperand const & +HexagonMCInstrInfo::getExtendableOperand(MCInstrInfo const &MCII, + MCInst const &MCI) { + unsigned O = HexagonMCInstrInfo::getExtendableOp(MCII, MCI); + MCOperand const &MO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isExtendable(MCII, MCI) || + HexagonMCInstrInfo::isExtended(MCII, MCI)) && + (MO.isImm() || MO.isExpr())); + return (MO); +} + +unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtentAlignPos) & HexagonII::ExtentAlignMask); +} + +unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +// Return the max value that a constant extendable operand can have +// without being extended. +int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned isSigned = + (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1U << (bits - 1)); + else + return ~(-1U << bits); +} + +// Return the min value that a constant extendable operand can have +// without being extended. +int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned isSigned = + (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1U << (bits - 1); + else + return 0; +} + +char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, + MCInst const &MCI) { + return MCII.getName(MCI.getOpcode()); +} + +unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask); +} + +MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned const O = + (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask; + MCOperand const &MCO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || + HexagonMCInstrInfo::hasNewValue(MCII, MCI)) && + MCO.isReg()); + return (MCO); +} + +/// Return the new value or the newly produced value. +unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2); +} + +MCOperand const & +HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII, + MCInst const &MCI) { + unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI); + MCOperand const &MCO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || + HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) && + MCO.isReg()); + return (MCO); +} + +int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>( + (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask); + + switch (Target) { + default: + return Hexagon::ArchV4; + case HexagonII::HasV5SubT: + return Hexagon::ArchV5; + } +} + +// Return the Hexagon ISA class for the insn. +unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); +} + +unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCInst const &MCI) { + + const InstrItinerary *II = STI.getSchedModel().InstrItineraries; + int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); + return ((II[SchedClass].FirstStage + HexagonStages)->getUnits()); +} + +bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) { + if (!HexagonMCInstrInfo::isBundle(MCI)) + return false; + + for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { + auto MI = I.getInst(); + if (isImmext(*MI)) + return true; + } + + return false; +} + +bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) { + return extenderForIndex(MCB, Index) != nullptr; +} + +// Return whether the instruction is a legal new-value producer. +bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); +} + +/// Return whether the insn produces a second value. +bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2); +} + +MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) { + assert(isBundle(MCB)); + assert(Index < HEXAGON_PACKET_SIZE); + return *MCB.getOperand(bundleInstructionsOffset + Index).getInst(); +} + +bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) { + auto Result = Hexagon::BUNDLE == MCI.getOpcode(); + assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm())); + return Result; +} + +// Return whether the insn is an actual insn. +bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { + return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() && + !HexagonMCInstrInfo::isPrefix(MCII, MCI) && + HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); +} + +bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII, + MCInst const &MCI) { + return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND); +} + +bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) { + return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) || + (Reg >= Hexagon::D8 && Reg <= Hexagon::D11)); +} + +bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) { + return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI); +} + +// Return whether the instruction needs to be constant extended. +// 1) Always return true if the instruction has 'isExtended' flag set. +// +// isExtendable: +// 2) For immediate extended operands, return true only if the value is +// out-of-range. +// 3) For global address, always return true. + +bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, + MCInst const &MCI) { + if (HexagonMCInstrInfo::isExtended(MCII, MCI)) + return true; + // Branch insns are handled as necessary by relaxation. + if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) || + (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND && + HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) || + (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV && + HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch())) + return false; + // Otherwise loop instructions and other CR insts are handled by relaxation + else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) && + (MCI.getOpcode() != Hexagon::C4_addipc)) + return false; + else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) + return false; + + MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI); + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + assert(!MO.isImm()); + int64_t Value; + if (!MO.getExpr()->evaluateAsAbsolute(Value)) + return true; + int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI); + int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI); + return (MinValue > Value || Value > MaxValue); +} + +bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +} + +bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; +} + +bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::FPPos) & HexagonII::FPMask); +} + +bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) { + auto Op = MCI.getOpcode(); + return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c || + Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext); +} + +bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) { + assert(isBundle(MCI)); + int64_t Flags = MCI.getOperand(0).getImm(); + return (Flags & innerLoopMask) != 0; +} + +bool HexagonMCInstrInfo::isIntReg(unsigned Reg) { + return (Reg >= Hexagon::R0 && Reg <= Hexagon::R31); +} + +bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) { + return ((Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || + (Reg >= Hexagon::R16 && Reg <= Hexagon::R23)); +} + +// Return whether the insn is a new-value consumer. +bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +// Return whether the operand can be constant extended. +bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII, + MCInst const &MCI, + unsigned short OperandNum) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) == + OperandNum; +} + +bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) { + assert(isBundle(MCI)); + int64_t Flags = MCI.getOperand(0).getImm(); + return (Flags & outerLoopMask) != 0; +} + +bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +} + +bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask); +} + +/// Return whether the insn is newly predicated. +bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + +bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ( + !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask)); +} + +bool HexagonMCInstrInfo::isPredReg(unsigned Reg) { + return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0); +} + +bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) { + return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX); +} + +bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); +} + +bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) { + assert(isBundle(MCI)); + auto Flags = MCI.getOperand(0).getImm(); + return (Flags & memReorderDisabledMask) != 0; +} + +bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) { + assert(isBundle(MCI)); + auto Flags = MCI.getOperand(0).getImm(); + return (Flags & memStoreReorderEnabledMask) != 0; +} + +bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask); +} + +bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask); +} + +bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) { + if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) && + (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST)) + return true; + return false; +} + +int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) { + auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max()) + << 8; + if (MCI.size() <= Index) + return Sentinal; + MCOperand const &MCO = MCI.getOperand(Index); + if (!MCO.isExpr()) + return Sentinal; + int64_t Value; + if (!MCO.getExpr()->evaluateAsAbsolute(Value)) + return Sentinal; + return Value; +} + +void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) { + MCInst Nop; + Nop.setOpcode(Hexagon::A2_nop); + assert(isBundle(MCB)); + while ((HexagonMCInstrInfo::isInnerLoop(MCB) && + (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) || + ((HexagonMCInstrInfo::isOuterLoop(MCB) && + (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE)))) + MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop))); +} + +bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, + MCInst const &MCI) { + if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) + return false; + + unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: + case Hexagon::Sched::ALU64_tc_2_SLOT23: + case Hexagon::Sched::ALU64_tc_3x_SLOT23: + case Hexagon::Sched::M_tc_2_SLOT23: + case Hexagon::Sched::M_tc_3x_SLOT23: + case Hexagon::Sched::S_2op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_3x_SLOT23: + return true; + } + return false; +} + +void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB, + DuplexCandidate Candidate) { + assert(Candidate.packetIndexI < MCB.size()); + assert(Candidate.packetIndexJ < MCB.size()); + assert(isBundle(MCB)); + MCInst *Duplex = + deriveDuplex(Context, Candidate.iClass, + *MCB.getOperand(Candidate.packetIndexJ).getInst(), + *MCB.getOperand(Candidate.packetIndexI).getInst()); + assert(Duplex != nullptr); + MCB.getOperand(Candidate.packetIndexI).setInst(Duplex); + MCB.erase(MCB.begin() + Candidate.packetIndexJ); +} + +void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | innerLoopMask); +} + +void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | memReorderDisabledMask); + assert(isMemReorderDisabled(MCI)); +} + +void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask); + assert(isMemStoreReorderEnabled(MCI)); +} + +void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | outerLoopMask); +} +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h new file mode 100644 index 0000000..0237b28 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -0,0 +1,289 @@ +//===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utility functions for Hexagon specific MCInst queries +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H +#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H + +#include "HexagonMCExpr.h" +#include "llvm/MC/MCInst.h" + +namespace llvm { +class HexagonMCChecker; +class MCContext; +class MCInstrDesc; +class MCInstrInfo; +class MCInst; +class MCOperand; +class MCSubtargetInfo; +namespace HexagonII { +enum class MemAccessSize; +} +class DuplexCandidate { +public: + unsigned packetIndexI, packetIndexJ, iClass; + DuplexCandidate(unsigned i, unsigned j, unsigned iClass) + : packetIndexI(i), packetIndexJ(j), iClass(iClass) {} +}; +namespace HexagonMCInstrInfo { +size_t const innerLoopOffset = 0; +int64_t const innerLoopMask = 1 << innerLoopOffset; + +size_t const outerLoopOffset = 1; +int64_t const outerLoopMask = 1 << outerLoopOffset; + +// do not reorder memory load/stores by default load/stores are re-ordered +// and by default loads can be re-ordered +size_t const memReorderDisabledOffset = 2; +int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset; + +// allow re-ordering of memory stores by default stores cannot be re-ordered +size_t const memStoreReorderEnabledOffset = 3; +int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset; + +size_t const bundleInstructionsOffset = 1; + +void addConstant(MCInst &MI, uint64_t Value, MCContext &Context); +void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI); + +// Returns a iterator range of instructions in this bundle +iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI); + +// Returns the number of instructions in the bundle +size_t bundleSize(MCInst const &MCI); + +// Put the packet in to canonical form, compound, duplex, pad, and shuffle +bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Checker); + +// Clamp off upper 26 bits of extendable operand for emission +void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); + +MCInst createBundle(); + +// Return the extender for instruction at Index or nullptr if none +MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); +void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI, bool MustExtend); + +// Create a duplex instruction given the two subinsts +MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, + MCInst const &inst1); +MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst, + MCOperand const &MO); + +// Convert this instruction in to a duplex subinst +MCInst deriveSubInst(MCInst const &Inst); + +// Return the extender for instruction at Index or nullptr if none +MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); + +// Return memory access size +HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII, + MCInst const &MCI); + +// Return number of bits in the constant extended operand. +unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return constant extended operand number. +unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI); + +MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return which duplex group this instruction belongs to +unsigned getDuplexCandidateGroup(MCInst const &MI); + +// Return a list of all possible instruction duplex combinations +SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII, + MCInst const &MCB); + +// Return the index of the extendable operand +unsigned short getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return a reference to the extendable operand +MCOperand const &getExtendableOperand(MCInstrInfo const &MCII, + MCInst const &MCI); + +// Return the implicit alignment of the extendable operand +unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the number of logical bits of the extendable operand +unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the max value that a constant extendable operand can have +// without being extended. +int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the min value that a constant extendable operand can have +// without being extended. +int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return instruction name +char const *getName(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the operand index for the new value. +unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the operand that consumes or produces a new value. +MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); +unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI); +MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII, + MCInst const &MCI); + +int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the Hexagon ISA class for the insn. +unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); + +/// Return the slots used by the insn. +unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst const &MCI); + +// Does the packet have an extender for the instruction at Index +bool hasExtenderForIndex(MCInst const &MCB, size_t Index); + +bool hasImmExt(MCInst const &MCI); + +// Return whether the instruction is a legal new-value producer. +bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the instruction at Index +MCInst const &instruction(MCInst const &MCB, size_t Index); + +// Returns whether this MCInst is a wellformed bundle +bool isBundle(MCInst const &MCI); + +// Return whether the insn is an actual insn. +bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the duplex iclass given the two duplex classes +unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); + +int64_t minConstant(MCInst const &MCI, size_t Index); +template <unsigned N, unsigned S> +bool inRange(MCInst const &MCI, size_t Index) { + return isShiftedUInt<N, S>(minConstant(MCI, Index)); +} +template <unsigned N, unsigned S> +bool inSRange(MCInst const &MCI, size_t Index) { + return isShiftedInt<N, S>(minConstant(MCI, Index)); +} +template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) { + return isUInt<N>(minConstant(MCI, Index)); +} + +// Return whether the instruction needs to be constant extended. +bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); + +// Is this double register suitable for use in a duplex subinst +bool isDblRegForSubInst(unsigned Reg); + +// Is this a duplex instruction +bool isDuplex(MCInstrInfo const &MCII, MCInst const &MCI); + +// Can these instructions be duplexed +bool isDuplexPair(MCInst const &MIa, MCInst const &MIb); + +// Can these duplex classes be combine in to a duplex instruction +bool isDuplexPairMatch(unsigned Ga, unsigned Gb); + +// Return true if the insn may be extended based on the operand value. +bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the instruction must be always extended. +bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI); + +/// Return whether it is a floating-point insn. +bool isFloat(MCInstrInfo const &MCII, MCInst const &MCI); + +// Returns whether this instruction is an immediate extender +bool isImmext(MCInst const &MCI); + +// Returns whether this bundle is an endloop0 +bool isInnerLoop(MCInst const &MCI); + +// Is this an integer register +bool isIntReg(unsigned Reg); + +// Is this register suitable for use in a duplex subinst +bool isIntRegForSubInst(unsigned Reg); +bool isMemReorderDisabled(MCInst const &MCI); +bool isMemStoreReorderEnabled(MCInst const &MCI); + +// Return whether the insn is a new-value consumer. +bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return true if the operand can be constant extended. +bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI, + unsigned short OperandNum); + +// Can these two instructions be duplexed +bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa, + bool ExtendedA, MCInst const &MIb, bool ExtendedB, + bool bisReversable); + +// Returns whether this bundle is an endloop1 +bool isOuterLoop(MCInst const &MCI); + +// Return whether this instruction is predicated +bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI); +bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI); +bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the predicate sense is true +bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Is this a predicate register +bool isPredReg(unsigned Reg); + +// Return whether the insn is a prefix. +bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI); + +/// Return whether the insn can be packaged only with A and X-type insns. +bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI); + +/// Return whether the insn can be packaged only with an A-type insn in slot #1. +bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI); +bool isVector(MCInstrInfo const &MCII, MCInst const &MCI); + +// Pad the bundle with nops to satisfy endloop requirements +void padEndloop(MCContext &Context, MCInst &MCI); + +bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); + +// Replace the instructions inside MCB, represented by Candidate +void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate); + +// Marks a bundle as endloop0 +void setInnerLoop(MCInst &MCI); +void setMemReorderDisabled(MCInst &MCI); +void setMemStoreReorderEnabled(MCInst &MCI); + +// Marks a bundle as endloop1 +void setOuterLoop(MCInst &MCI); + +// Would duplexing this instruction create a requirement to extend +bool subInstWouldBeExtended(MCInst const &potentialDuplex); + +// Attempt to find and replace compound pairs +void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); +} +} + +#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp new file mode 100644 index 0000000..8e70280 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -0,0 +1,237 @@ +//===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-shuffle" + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> + DisableShuffle("disable-hexagon-shuffle", cl::Hidden, cl::init(false), + cl::desc("Disable Hexagon instruction shuffling")); + +void HexagonMCShuffler::init(MCInst &MCB) { + if (HexagonMCInstrInfo::isBundle(MCB)) { + MCInst const *Extender = nullptr; + // Copy the bundle for the shuffling. + for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo()); + MCInst *MI = const_cast<MCInst *>(I.getInst()); + + if (!HexagonMCInstrInfo::isImmext(*MI)) { + append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI), + false); + Extender = nullptr; + } else + Extender = MI; + } + } + + BundleFlags = MCB.getOperand(0).getImm(); +} + +void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI, + bool bInsertAtFront) { + if (HexagonMCInstrInfo::isBundle(MCB)) { + if (bInsertAtFront && AddMI) + append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI), + false); + MCInst const *Extender = nullptr; + // Copy the bundle for the shuffling. + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo()); + MCInst *MI = const_cast<MCInst *>(I.getInst()); + if (!HexagonMCInstrInfo::isImmext(*MI)) { + append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI), + false); + Extender = nullptr; + } else + Extender = MI; + } + if (!bInsertAtFront && AddMI) + append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI), + false); + } + + BundleFlags = MCB.getOperand(0).getImm(); +} + +void HexagonMCShuffler::copyTo(MCInst &MCB) { + MCB.clear(); + MCB.addOperand(MCOperand::createImm(BundleFlags)); + // Copy the results into the bundle. + for (HexagonShuffler::iterator I = begin(); I != end(); ++I) { + + MCInst const *MI = I->getDesc(); + MCInst const *Extender = I->getExtender(); + if (Extender) + MCB.addOperand(MCOperand::createInst(Extender)); + MCB.addOperand(MCOperand::createInst(MI)); + } +} + +bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) { + if (shuffle()) { + // Copy the results into the bundle. + copyTo(MCB); + } else + DEBUG(MCB.dump()); + + return (!getError()); +} + +bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) { + HexagonMCShuffler MCS(MCII, STI, MCB); + + if (DisableShuffle) + // Ignore if user chose so. + return false; + + if (!HexagonMCInstrInfo::bundleSize(MCB)) { + // There once was a bundle: + // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ... + // * %D2<def> = IMPLICIT_DEF; flags: + // * %D7<def> = IMPLICIT_DEF; flags: + // After the IMPLICIT_DEFs were removed by the asm printer, the bundle + // became empty. + DEBUG(dbgs() << "Skipping empty bundle"); + return false; + } else if (!HexagonMCInstrInfo::isBundle(MCB)) { + DEBUG(dbgs() << "Skipping stand-alone insn"); + return false; + } + + // Reorder the bundle and copy the result. + if (!MCS.reshuffleTo(MCB)) { + // Unless there is any error, which should not happen at this point. + unsigned shuffleError = MCS.getError(); + switch (shuffleError) { + default: + llvm_unreachable("unknown error"); + case HexagonShuffler::SHUFFLE_ERROR_INVALID: + llvm_unreachable("invalid packet"); + case HexagonShuffler::SHUFFLE_ERROR_STORES: + llvm_unreachable("too many stores"); + case HexagonShuffler::SHUFFLE_ERROR_LOADS: + llvm_unreachable("too many loads"); + case HexagonShuffler::SHUFFLE_ERROR_BRANCHES: + llvm_unreachable("too many branches"); + case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS: + llvm_unreachable("no suitable slot"); + case HexagonShuffler::SHUFFLE_ERROR_SLOTS: + llvm_unreachable("over-subscribed slots"); + case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case. + return true; + } + } + + return true; +} + +unsigned +llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + SmallVector<DuplexCandidate, 8> possibleDuplexes) { + + if (DisableShuffle) + return HexagonShuffler::SHUFFLE_SUCCESS; + + if (!HexagonMCInstrInfo::bundleSize(MCB)) { + // There once was a bundle: + // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ... + // * %D2<def> = IMPLICIT_DEF; flags: + // * %D7<def> = IMPLICIT_DEF; flags: + // After the IMPLICIT_DEFs were removed by the asm printer, the bundle + // became empty. + DEBUG(dbgs() << "Skipping empty bundle"); + return HexagonShuffler::SHUFFLE_SUCCESS; + } else if (!HexagonMCInstrInfo::isBundle(MCB)) { + DEBUG(dbgs() << "Skipping stand-alone insn"); + return HexagonShuffler::SHUFFLE_SUCCESS; + } + + bool doneShuffling = false; + unsigned shuffleError; + while (possibleDuplexes.size() > 0 && (!doneShuffling)) { + // case of Duplex Found + DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val(); + MCInst Attempt(MCB); + HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry); + HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler + if (MCS.size() == 1) { // case of one duplex + // copy the created duplex in the shuffler to the bundle + MCS.copyTo(MCB); + doneShuffling = true; + return HexagonShuffler::SHUFFLE_SUCCESS; + } + // try shuffle with this duplex + doneShuffling = MCS.reshuffleTo(MCB); + shuffleError = MCS.getError(); + + if (doneShuffling) + break; + } + + if (doneShuffling == false) { + HexagonMCShuffler MCS(MCII, STI, MCB); + doneShuffling = MCS.reshuffleTo(MCB); // shuffle + shuffleError = MCS.getError(); + } + if (!doneShuffling) + return shuffleError; + + return HexagonShuffler::SHUFFLE_SUCCESS; +} + +bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB, MCInst const *AddMI, int fixupCount) { + if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI) + return false; + + // if fixups present, make sure we don't insert too many nops that would + // later prevent an extender from being inserted. + unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB); + if (bundleSize >= HEXAGON_PACKET_SIZE) + return false; + if (fixupCount >= 2) { + return false; + } else { + if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount) + return false; + } + + if (DisableShuffle) + return false; + + HexagonMCShuffler MCS(MCII, STI, MCB, AddMI); + if (!MCS.reshuffleTo(MCB)) { + unsigned shuffleError = MCS.getError(); + switch (shuffleError) { + default: + return false; + case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case + return true; + } + } + + return true; +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h new file mode 100644 index 0000000..a21cce1 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h @@ -0,0 +1,65 @@ +//=-- HexagonMCShuffler.h ---------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCSHUFFLER_H +#define HEXAGONMCSHUFFLER_H + +#include "MCTargetDesc/HexagonShuffler.h" + +namespace llvm { + +class MCInst; + +// Insn bundle shuffler. +class HexagonMCShuffler : public HexagonShuffler { + bool immext_present; + bool duplex_present; + +public: + HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) + : HexagonShuffler(MCII, STI) { + init(MCB); + }; + HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB, const MCInst *AddMI, + bool bInsertAtFront = false) + : HexagonShuffler(MCII, STI) { + init(MCB, AddMI, bInsertAtFront); + }; + + // Copy reordered bundle to another. + void copyTo(MCInst &MCB); + // Reorder and copy result to another. + bool reshuffleTo(MCInst &MCB); + + bool immextPresent() const { return immext_present; }; + bool duplexPresent() const { return duplex_present; }; + +private: + void init(MCInst &MCB); + void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false); +}; + +// Invocation of the shuffler. +bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &); +bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &, const MCInst *, int); +unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &, + SmallVector<DuplexCandidate, 8>); +} + +#endif // HEXAGONMCSHUFFLER_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp new file mode 100644 index 0000000..9a29257 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -0,0 +1,244 @@ +//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCTargetDesc.h" +#include "Hexagon.h" +#include "HexagonMCAsmInfo.h" +#include "HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "HexagonGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "HexagonGenRegisterInfo.inc" + +cl::opt<bool> llvm::HexagonDisableCompound + ("mno-compound", + cl::desc("Disable looking for compound instructions for Hexagon")); + +cl::opt<bool> llvm::HexagonDisableDuplex + ("mno-pairing", + cl::desc("Disable looking for duplex instructions for Hexagon")); + +StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) { + if (CPU.empty()) + CPU = "hexagonv60"; + return CPU; +} + +MCInstrInfo *llvm::createHexagonMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitHexagonMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitHexagonMCRegisterInfo(X, Hexagon::R0); + return X; +} + +static MCSubtargetInfo * +createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU); + return createHexagonMCSubtargetInfoImpl(TT, CPU, FS); +} + +namespace { +class HexagonTargetAsmStreamer : public HexagonTargetStreamer { +public: + HexagonTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &, bool, + MCInstPrinter &) + : HexagonTargetStreamer(S) {} + void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS, + const MCInst &Inst, const MCSubtargetInfo &STI) override { + assert(HexagonMCInstrInfo::isBundle(Inst)); + assert(HexagonMCInstrInfo::bundleSize(Inst) <= HEXAGON_PACKET_SIZE); + std::string Buffer; + { + raw_string_ostream TempStream(Buffer); + InstPrinter.printInst(&Inst, TempStream, "", STI); + } + StringRef Contents(Buffer); + auto PacketBundle = Contents.rsplit('\n'); + auto HeadTail = PacketBundle.first.split('\n'); + StringRef Separator = "\n"; + StringRef Indent = "\t\t"; + OS << "\t{\n"; + while (!HeadTail.first.empty()) { + StringRef InstTxt; + auto Duplex = HeadTail.first.split('\v'); + if (!Duplex.second.empty()) { + OS << Indent << Duplex.first << Separator; + InstTxt = Duplex.second; + } else if (!HeadTail.first.trim().startswith("immext")) { + InstTxt = Duplex.first; + } + if (!InstTxt.empty()) + OS << Indent << InstTxt << Separator; + HeadTail = HeadTail.second.split('\n'); + } + OS << "\t}" << PacketBundle.second; + } +}; +} + +namespace { +class HexagonTargetELFStreamer : public HexagonTargetStreamer { +public: + MCELFStreamer &getStreamer() { + return static_cast<MCELFStreamer &>(Streamer); + } + HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI) + : HexagonTargetStreamer(S) { + auto Bits = STI.getFeatureBits(); + unsigned Flags; + if (Bits.to_ullong() & llvm::Hexagon::ArchV5) + Flags = ELF::EF_HEXAGON_MACH_V5; + else + Flags = ELF::EF_HEXAGON_MACH_V4; + getStreamer().getAssembler().setELFHeaderEFlags(Flags); + } + void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, + unsigned AccessSize) override { + HexagonMCELFStreamer &HexagonELFStreamer = + static_cast<HexagonMCELFStreamer &>(getStreamer()); + HexagonELFStreamer.HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, + AccessSize); + } + void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, + unsigned AccessSize) override { + HexagonMCELFStreamer &HexagonELFStreamer = + static_cast<HexagonMCELFStreamer &>(getStreamer()); + HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol( + Symbol, Size, ByteAlignment, AccessSize); + } +}; +} + +static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT) { + MCAsmInfo *MAI = new HexagonMCAsmInfo(TT); + + // VirtualFP = (R30 + #0). + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(nullptr, Hexagon::R30, 0); + MAI->addInitialFrameState(Inst); + + return MAI; +} + +static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) + RM = Reloc::Static; + X->initMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0) + return (new HexagonInstPrinter(MAI, MII, MRI)); + else + return nullptr; +} + +static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool IsVerboseAsm) { + return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *InstPrint); +} + +static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context, + MCAsmBackend &MAB, raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { + return createHexagonELFStreamer(Context, MAB, OS, Emitter); +} + +static MCTargetStreamer * +createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) { + return new HexagonTargetELFStreamer(S, STI); +} + +// Force static initialization. +extern "C" void LLVMInitializeHexagonTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget, + createHexagonMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, + createHexagonMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheHexagonTarget, + createHexagonMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget, + createHexagonMCSubtargetInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget, + createHexagonMCCodeEmitter); + + // Register the asm backend + TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget, + createHexagonAsmBackend); + + // Register the obj streamer + TargetRegistry::RegisterELFStreamer(TheHexagonTarget, createMCStreamer); + + // Register the asm streamer + TargetRegistry::RegisterAsmTargetStreamer(TheHexagonTarget, + createMCAsmTargetStreamer); + + // Register the MC Inst Printer + TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, + createHexagonMCInstPrinter); + + TargetRegistry::RegisterObjectTargetStreamer( + TheHexagonTarget, createHexagonObjectTargetStreamer); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h new file mode 100644 index 0000000..a005a01 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -0,0 +1,75 @@ +//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H +#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H + +#include <cstdint> + +#include "llvm/Support/CommandLine.h" + +namespace llvm { +struct InstrItinerary; +struct InstrStage; +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class Target; +class Triple; +class StringRef; +class raw_ostream; +class raw_pwrite_stream; + +extern Target TheHexagonTarget; +extern cl::opt<bool> HexagonDisableCompound; +extern cl::opt<bool> HexagonDisableDuplex; +extern const InstrStage HexagonStages[]; + +MCInstrInfo *createHexagonMCInstrInfo(); + +MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &MCT); + +MCAsmBackend *createHexagonAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, StringRef CPU); + +MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, StringRef CPU); + +namespace HEXAGON_MC { + StringRef selectHexagonCPU(const Triple &TT, StringRef CPU); +} + +} // End llvm namespace + +// Define symbolic names for Hexagon registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "HexagonGenRegisterInfo.inc" + +// Defines symbolic names for the Hexagon instructions. +// +#define GET_INSTRINFO_ENUM +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "HexagonGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp new file mode 100644 index 0000000..6ceb848 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -0,0 +1,470 @@ +//===----- HexagonShuffler.cpp - Instruction bundle shuffling -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-shuffle" + +#include <algorithm> +#include <utility> +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "HexagonShuffler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +// Insn shuffling priority. +class HexagonBid { + // The priority is directly proportional to how restricted the insn is based + // on its flexibility to run on the available slots. So, the fewer slots it + // may run on, the higher its priority. + enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15. + unsigned Bid; + +public: + HexagonBid() : Bid(0){}; + HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; }; + + // Check if the insn priority is overflowed. + bool isSold() const { return (Bid >= MAX); }; + + HexagonBid &operator+=(const HexagonBid &B) { + Bid += B.Bid; + return *this; + }; +}; + +// Slot shuffling allocation. +class HexagonUnitAuction { + HexagonBid Scores[HEXAGON_PACKET_SIZE]; + // Mask indicating which slot is unavailable. + unsigned isSold : HEXAGON_PACKET_SIZE; + +public: + HexagonUnitAuction() : isSold(0){}; + + // Allocate slots. + bool bid(unsigned B) { + // Exclude already auctioned slots from the bid. + unsigned b = B & ~isSold; + if (b) { + for (unsigned i = 0; i < HEXAGON_PACKET_SIZE; ++i) + if (b & (1 << i)) { + // Request candidate slots. + Scores[i] += HexagonBid(b); + isSold |= Scores[i].isSold() << i; + } + return true; + ; + } else + // Error if the desired slots are already full. + return false; + }; +}; +} // end anonymous namespace + +unsigned HexagonResource::setWeight(unsigned s) { + const unsigned SlotWeight = 8; + const unsigned MaskWeight = SlotWeight - 1; + bool Key = (1 << s) & getUnits(); + + // TODO: Improve this API so that we can prevent misuse statically. + assert(SlotWeight * s < 32 && "Argument to setWeight too large."); + + // Calculate relative weight of the insn for the given slot, weighing it the + // heavier the more restrictive the insn is and the lowest the slots that the + // insn may be executed in. + Weight = + (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits())) + << countTrailingZeros(getUnits())); + return (Weight); +} + +HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL; + +bool HexagonCVIResource::SetUp = HexagonCVIResource::setup(); + +bool HexagonCVIResource::setup() { + assert(!TUL); + TUL = new (TypeUnitsAndLanes); + + (*TUL)[HexagonII::TypeCVI_VA] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); + (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VM_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VM_ST] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); + + return true; +} + +HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, + MCInst const *id) + : HexagonResource(s) { + unsigned T = HexagonMCInstrInfo::getType(MCII, *id); + + if (TUL->count(T)) { + // For an HVX insn. + Valid = true; + setUnits((*TUL)[T].first); + setLanes((*TUL)[T].second); + setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad()); + setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore()); + } else { + // For core insns. + Valid = false; + setUnits(0); + setLanes(0); + setLoad(false); + setStore(false); + } +} + +HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI) + : MCII(MCII), STI(STI) { + reset(); +} + +void HexagonShuffler::reset() { + Packet.clear(); + BundleFlags = 0; + Error = SHUFFLE_SUCCESS; +} + +void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, + unsigned S, bool X) { + HexagonInstr PI(MCII, ID, Extender, S, X); + + Packet.push_back(PI); +} + +/// Check that the packet is legal and enforce relative insn order. +bool HexagonShuffler::check() { + // Descriptive slot masks. + const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, + slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4, + slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; + // Highest slots for branches and stores used to keep their original order. + unsigned slotJump = slotFirstJump; + unsigned slotLoadStore = slotFirstLoadStore; + // Number of branches, solo branches, indirect branches. + unsigned jumps = 0, jump1 = 0, jumpr = 0; + // Number of memory operations, loads, solo loads, stores, solo stores, single + // stores. + unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; + // Number of HVX loads, HVX stores. + unsigned CVIloads = 0, CVIstores = 0; + // Number of duplex insns, solo insns. + unsigned duplex = 0, solo = 0; + // Number of insns restricting other insns in the packet to A and X types, + // which is neither A or X types. + unsigned onlyAX = 0, neitherAnorX = 0; + // Number of insns restricting other insns in slot #1 to A type. + unsigned onlyAin1 = 0; + // Number of insns restricting any insn in slot #1, except A2_nop. + unsigned onlyNo1 = 0; + unsigned xtypeFloat = 0; + unsigned pSlot3Cnt = 0; + iterator slot3ISJ = end(); + + // Collect information from the insns in the packet. + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + + if (HexagonMCInstrInfo::isSolo(MCII, *ID)) + solo += !ISJ->isSoloException(); + else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID)) + onlyAX += !ISJ->isSoloException(); + else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID)) + onlyAin1 += !ISJ->isSoloException(); + if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 && + HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE) + ++neitherAnorX; + if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) { + ++pSlot3Cnt; + slot3ISJ = ISJ; + } + + switch (HexagonMCInstrInfo::getType(MCII, *ID)) { + case HexagonII::TypeXTYPE: + if (HexagonMCInstrInfo::isFloat(MCII, *ID)) + ++xtypeFloat; + break; + case HexagonII::TypeJR: + ++jumpr; + // Fall-through. + case HexagonII::TypeJ: + ++jumps; + break; + case HexagonII::TypeCVI_VM_VP_LDU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_LD: + case HexagonII::TypeCVI_VM_TMP_LD: + case HexagonII::TypeCVI_VM_CUR_LD: + ++CVIloads; + case HexagonII::TypeLD: + ++loads; + ++memory; + if (ISJ->Core.getUnits() == slotSingleLoad) + ++load0; + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) + ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + break; + case HexagonII::TypeCVI_VM_STU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_ST: + case HexagonII::TypeCVI_VM_NEW_ST: + ++CVIstores; + case HexagonII::TypeST: + ++stores; + ++memory; + if (ISJ->Core.getUnits() == slotSingleStore) + ++store0; + break; + case HexagonII::TypeMEMOP: + ++loads; + ++stores; + ++store1; + ++memory; + break; + case HexagonII::TypeNV: + ++memory; // NV insns are memory-like. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch()) + ++jumps, ++jump1; + break; + case HexagonII::TypeCR: + // Legacy conditional branch predicated on a register. + case HexagonII::TypeSYSTEM: + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) + ++loads; + break; + } + } + + // Check if the packet is legal. + if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || + (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || + (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { + Error = SHUFFLE_ERROR_INVALID; + return false; + } + + if (jump1 && jumps > 1) { + // Error if single branch with another branch. + Error = SHUFFLE_ERROR_BRANCHES; + return false; + } + + // Modify packet accordingly. + // TODO: need to reserve slots #0 and #1 for duplex insns. + bool bOnlySlot3 = false; + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + + if (!ISJ->Core.getUnits()) { + // Error if insn may not be executed in any slot. + Error = SHUFFLE_ERROR_UNKNOWN; + return false; + } + + // Exclude from slot #1 any insn but A2_nop. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop) + if (onlyNo1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); + + // Exclude from slot #1 any insn but A-type. + if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32) + if (onlyAin1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); + + // Branches must keep the original order. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() || + HexagonMCInstrInfo::getDesc(MCII, *ID).isCall()) + if (jumps > 1) { + if (jumpr || slotJump < slotLastJump) { + // Error if indirect branch with another branch or + // no more slots available for branches. + Error = SHUFFLE_ERROR_BRANCHES; + return false; + } + // Pin the branch to the highest slot available to it. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump); + // Update next highest slot available to branches. + slotJump >>= 1; + } + + // A single load must use slot #0. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) { + if (loads == 1 && loads == memory) + // Pin the load to slot #0. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad); + } + + // A single store must use slot #0. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) { + if (!store0) { + if (stores == 1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore); + else if (stores > 1) { + if (slotLoadStore < slotLastLoadStore) { + // Error if no more slots available for stores. + Error = SHUFFLE_ERROR_STORES; + return false; + } + // Pin the store to the highest slot available to it. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore); + // Update the next highest slot available to stores. + slotLoadStore >>= 1; + } + } + if (store1 && stores > 1) { + // Error if a single store with another store. + Error = SHUFFLE_ERROR_STORES; + return false; + } + } + + // flag if an instruction can only be executed in slot 3 + if (ISJ->Core.getUnits() == slotThree) + bOnlySlot3 = true; + + if (!ISJ->Core.getUnits()) { + // Error if insn may not be executed in any slot. + Error = SHUFFLE_ERROR_NOSLOTS; + return false; + } + } + + bool validateSlots = true; + if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) { + // save off slot mask of instruction marked with A_PREFER_SLOT3 + // and then pin it to slot #3 + unsigned saveUnits = slot3ISJ->Core.getUnits(); + slot3ISJ->Core.setUnits(saveUnits & slotThree); + + HexagonUnitAuction AuctionCore; + std::sort(begin(), end(), HexagonInstr::lessCore); + + // see if things ok with that instruction being pinned to slot #3 + bool bFail = false; + for (iterator I = begin(); I != end() && bFail != true; ++I) + if (!AuctionCore.bid(I->Core.getUnits())) + bFail = true; + + // if yes, great, if not then restore original slot mask + if (!bFail) + validateSlots = false; // all good, no need to re-do auction + else + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) + ISJ->Core.setUnits(saveUnits); + } + } + + // Check if any slot, core, is over-subscribed. + // Verify the core slot subscriptions. + if (validateSlots) { + HexagonUnitAuction AuctionCore; + + std::sort(begin(), end(), HexagonInstr::lessCore); + + for (iterator I = begin(); I != end(); ++I) + if (!AuctionCore.bid(I->Core.getUnits())) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } + // Verify the CVI slot subscriptions. + { + HexagonUnitAuction AuctionCVI; + + std::sort(begin(), end(), HexagonInstr::lessCVI); + + for (iterator I = begin(); I != end(); ++I) + for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid? + if (!AuctionCVI.bid(I->CVI.getUnits() << i)) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } + + Error = SHUFFLE_SUCCESS; + return true; +} + +bool HexagonShuffler::shuffle() { + if (size() > HEXAGON_PACKET_SIZE) { + // Ignore a packet with with more than what a packet can hold + // or with compound or duplex insns for now. + Error = SHUFFLE_ERROR_INVALID; + return false; + } + + // Check and prepare packet. + if (size() > 1 && check()) + // Reorder the handles for each slot. + for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE; + ++nSlot) { + iterator ISJ, ISK; + unsigned slotSkip, slotWeight; + + // Prioritize the handles considering their restrictions. + for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0; + ISK != Packet.end(); ++ISK, ++slotSkip) + if (slotSkip < nSlot - emptySlots) + // Note which handle to begin at. + ++ISJ; + else + // Calculate the weight of the slot. + slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1); + + if (slotWeight) + // Sort the packet, favoring source order, + // beginning after the previous slot. + std::sort(ISJ, Packet.end()); + else + // Skip unused slot. + ++emptySlots; + } + + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) + DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); + dbgs() << ':' + << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc()) + .getOpcode(); + dbgs() << '\n'); + DEBUG(dbgs() << '\n'); + + return (!getError()); +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h new file mode 100644 index 0000000..174f10f --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -0,0 +1,184 @@ +//===----- HexagonShuffler.h - Instruction bundle shuffling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONSHUFFLER_H +#define HEXAGONSHUFFLER_H + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +namespace llvm { +// Insn resources. +class HexagonResource { + // Mask of the slots or units that may execute the insn and + // the weight or priority that the insn requires to be assigned a slot. + unsigned Slots, Weight; + +public: + HexagonResource(unsigned s) { setUnits(s); }; + + void setUnits(unsigned s) { + Slots = s & ~(~0U << HEXAGON_PACKET_SIZE); + }; + unsigned setWeight(unsigned s); + + unsigned getUnits() const { return (Slots); }; + unsigned getWeight() const { return (Weight); }; + + // Check if the resources are in ascending slot order. + static bool lessUnits(const HexagonResource &A, const HexagonResource &B) { + return (countPopulation(A.getUnits()) < countPopulation(B.getUnits())); + }; + // Check if the resources are in ascending weight order. + static bool lessWeight(const HexagonResource &A, const HexagonResource &B) { + return (A.getWeight() < B.getWeight()); + }; +}; + +// HVX insn resources. +class HexagonCVIResource : public HexagonResource { + typedef std::pair<unsigned, unsigned> UnitsAndLanes; + typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes; + + // Available HVX slots. + enum { + CVI_NONE = 0, + CVI_XLANE = 1 << 0, + CVI_SHIFT = 1 << 1, + CVI_MPY0 = 1 << 2, + CVI_MPY1 = 1 << 3 + }; + + static bool SetUp; + static bool setup(); + static TypeUnitsAndLanes *TUL; + + // Count of adjacent slots that the insn requires to be executed. + unsigned Lanes; + // Flag whether the insn is a load or a store. + bool Load, Store; + // Flag whether the HVX resources are valid. + bool Valid; + + void setLanes(unsigned l) { Lanes = l; }; + void setLoad(bool f = true) { Load = f; }; + void setStore(bool f = true) { Store = f; }; + +public: + HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id); + + bool isValid() const { return (Valid); }; + unsigned getLanes() const { return (Lanes); }; + bool mayLoad() const { return (Load); }; + bool mayStore() const { return (Store); }; +}; + +// Handle to an insn used by the shuffling algorithm. +class HexagonInstr { + friend class HexagonShuffler; + + MCInst const *ID; + MCInst const *Extender; + HexagonResource Core; + HexagonCVIResource CVI; + bool SoloException; + +public: + HexagonInstr(MCInstrInfo const &MCII, MCInst const *id, + MCInst const *Extender, unsigned s, bool x = false) + : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id), + SoloException(x){}; + + MCInst const *getDesc() const { return (ID); }; + + MCInst const *getExtender() const { return Extender; } + + unsigned isSoloException() const { return (SoloException); }; + + // Check if the handles are in ascending order for shuffling purposes. + bool operator<(const HexagonInstr &B) const { + return (HexagonResource::lessWeight(B.Core, Core)); + }; + // Check if the handles are in ascending order by core slots. + static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.Core, B.Core)); + }; + // Check if the handles are in ascending order by HVX slots. + static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.CVI, B.CVI)); + }; +}; + +// Bundle shuffler. +class HexagonShuffler { + typedef SmallVector<HexagonInstr, HEXAGON_PRESHUFFLE_PACKET_SIZE> + HexagonPacket; + + // Insn handles in a bundle. + HexagonPacket Packet; + + // Shuffling error code. + unsigned Error; + +protected: + int64_t BundleFlags; + MCInstrInfo const &MCII; + MCSubtargetInfo const &STI; + +public: + typedef HexagonPacket::iterator iterator; + + enum { + SHUFFLE_SUCCESS = 0, ///< Successful operation. + SHUFFLE_ERROR_INVALID, ///< Invalid bundle. + SHUFFLE_ERROR_STORES, ///< No free slots for store insns. + SHUFFLE_ERROR_LOADS, ///< No free slots for load insns. + SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns. + SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns. + SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots. + SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60). + SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict + SHUFFLE_ERROR_UNKNOWN ///< Unknown error. + }; + + explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI); + + // Reset to initial state. + void reset(); + // Check if the bundle may be validly shuffled. + bool check(); + // Reorder the insn handles in the bundle. + bool shuffle(); + + unsigned size() const { return (Packet.size()); }; + + iterator begin() { return (Packet.begin()); }; + iterator end() { return (Packet.end()); }; + + // Add insn handle to the bundle . + void append(MCInst const *ID, MCInst const *Extender, unsigned S, + bool X = false); + + // Return the error code for the last check or shuffling of the bundle. + void setError(unsigned Err) { Error = Err; }; + unsigned getError() const { return (Error); }; +}; +} + +#endif // HEXAGONSHUFFLER_H diff --git a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp new file mode 100644 index 0000000..40f6c8d --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheHexagonTarget; + +extern "C" void LLVMInitializeHexagonTargetInfo() { + RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon"); +} |